go-parsekit/tokenizer_test.go

package parsekit

import (
	"fmt"
	"io"
	"strings"
	"testing"
	"unicode/utf8"
)

// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using parsekit.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleTokenizer_Execute() {
	// Build the tokenizer for ip/mask.
	ip := T.Str("ip", A.IPv4)
	mask := T.Int8("mask", A.IPv4CIDRMask)
	cidr := C.Seq(ip, A.Slash, mask)
	tokenizer := NewTokenizer(cidr)

	for _, input := range []string{
		"000.000.000.000/000",
		"192.168.0.1/24",
		"255.255.255.255/32",
		"10.0.300.1/24",
		"not an IPv4 CIDR",
	} {
		// Execute returns a TokenHandlerResult and an error, which is nil on success.
		result, err := tokenizer.Execute(input)

		if err == nil {
			fmt.Printf("Result: %s\n", result.Tokens())
		} else {
			fmt.Printf("Error: %s\n", err)
		}
	}
	// Output:
	// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
	// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
	// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
	// Error: unexpected input
	// Error: unexpected input
}

func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
	r, _ := mkInput().NextRune()
	AssertEqual(t, 'T', r, "first rune")
}

func TestInputCanAcceptRunesFromReader(t *testing.T) {
	i := mkInput()
	i.NextRune()
	i.Accept()
	i.NextRune()
	i.Accept()
	i.NextRune()
	i.Accept()
	AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
}

func TestCallingNextRuneTwice_Panics(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: func() {
			i := mkInput()
			i.NextRune()
			i.NextRune()
		},
		Regexp: true,
		Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
	})
}

func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: mkInput().Accept,
		Regexp:   true,
		Expect:   `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
	})
}

func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: func() {
			i := mkInput()
			i.Merge()
		},
		Regexp: true,
		Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}

func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: func() {
			i := mkInput()
			f := i.Fork()
			i.NextRune()
			f.Merge()
		},
		Regexp: true,
		Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}

func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: func() {
			i := mkInput()
			f := i.Fork()
			i.Fork()
			f.Merge()
		},
		Regexp: true,
		Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}

func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
	i := mkInput()
	f1 := i.Fork()
	f2 := f1.Fork()
	f3 := f2.Fork()
	f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
	f5 := f4.Fork()
	AssertEqual(t, true, i.parent == nil, "i.parent == nil")
	AssertEqual(t, true, i.child == f1, "i.child == f1")
	AssertEqual(t, true, f1.parent == i, "f1.parent == i")
	AssertEqual(t, true, f1.child == f4, "f1.child == f4")
	AssertEqual(t, true, f2.child == nil, "f2.child == nil")
	AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
	AssertEqual(t, true, f3.child == nil, "f3.child == nil")
	AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
	AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
	AssertEqual(t, true, f4.child == f5, "f4.child == f5")
	AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
	AssertEqual(t, true, f5.child == nil, "f5.child == nil")

	i.NextRune()

	AssertEqual(t, true, i.parent == nil, "i.parent == nil")
	AssertEqual(t, true, i.child == nil, "i.child == nil")
	AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
	AssertEqual(t, true, f1.child == nil, "f1.child == nil")
	AssertEqual(t, true, f2.child == nil, "f2.child == nil")
	AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
	AssertEqual(t, true, f3.child == nil, "f3.child == nil")
	AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
	AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
	AssertEqual(t, true, f4.child == nil, "f4.child == nil")
	AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
	AssertEqual(t, true, f5.child == nil, "f5.child == nil")
}

func TestForkingInput_ClearsLastRune(t *testing.T) {
	AssertPanic(t, PanicT{
		Function: func() {
			i := mkInput()
			i.NextRune()
			i.Fork()
			i.Accept()
		},
		Regexp: true,
		Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
	})
}

func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
	i := mkInput()
	r, _ := i.NextRune()
	AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
	AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
	i.Accept()
	AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
	AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
	AssertEqual(t, 'T', i.reader.buffer[0], "TokenAPI.reader.buffer[0]")
	r, _ = i.NextRune()
	AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}

func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
	i := mkInput()
	for j := 0; j < 7; j++ {
		i.NextRune()
		i.Accept()
	}
	AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer")
	AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
}

func TestAccept_UpdatesCursor(t *testing.T) {
	i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
	AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1")
	for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
		i.NextRune()
		i.Accept()
	}
	AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2")
	i.NextRune() // read "\n", cursor ends up at start of new line
	i.Accept()
	AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3")
	for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
		i.NextRune()
		i.Accept()
	}
	AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4")
}

func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
	// Create input, accept the first rune.
	i := mkInput()
	i.NextRune()
	i.Accept() // T
	AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
	// Fork
	f := i.Fork()
	AssertEqual(t, f, i.child, "Input.child (must be f)")
	AssertEqual(t, i, f.parent, "Input.parent (must be i)")
	AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
	AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
	// Accept two runes via fork.
	f.NextRune()
	f.Accept() // e
	f.NextRune()
	f.Accept() // s
	AssertEqual(t, "es", f.Result().String(), "result runes in fork")
	AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
	AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
	// Merge fork back into parent
	f.Merge()
	AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
	AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
}

func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
	i := mkInput()
	i.NextRune()
	i.Accept()
	f1 := i.Fork()
	f1.NextRune()
	f1.Accept()
	f2 := f1.Fork()
	f2.NextRune()
	f2.Accept()
	AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
	AssertEqual(t, 1, i.result.offset, "i.offset A")
	AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
	AssertEqual(t, 2, f1.result.offset, "f1.offset A")
	AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
	AssertEqual(t, 3, f2.result.offset, "f2.offset A")
	f2.Merge()
	AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
	AssertEqual(t, 1, i.result.offset, "i.offset B")
	AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
	AssertEqual(t, 3, f1.result.offset, "f1.offset B")
	AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
	AssertEqual(t, 3, f2.result.offset, "f2.offset B")
	f1.Merge()
	AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
	AssertEqual(t, 3, i.result.offset, "i.offset C")
	AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
	AssertEqual(t, 3, f1.result.offset, "f1.offset C")
	AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
	AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}

func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
	i := NewTokenAPI(strings.NewReader("X"))
	i.NextRune()
	i.Accept()
	r, err := i.NextRune()
	AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
	AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
	i := NewTokenAPI(strings.NewReader("X"))
	f := i.Fork()
	f.NextRune()
	f.Accept()
	r, err := f.NextRune()
	AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
	r, err = i.NextRune()
	AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
	AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}

func mkInput() *TokenAPI {
	return NewTokenAPI(strings.NewReader("Testing"))
}