Finalized the work-through of the new version of the tokenizer code.
This commit is contained in:
parent
48d7fda9f8
commit
7598b62dd0
|
@ -274,7 +274,7 @@ func TestClearRunes(t *testing.T) {
|
|||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'a'
|
||||
api.Accept() // Add to runes
|
||||
api.ClearRunes() // Clear the runes
|
||||
api.ClearRunes() // Clear the runes, giving us a fresh start.
|
||||
api.NextRune() // Read 'p'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'r'
|
||||
|
|
|
@ -636,17 +636,12 @@ func MatchExcept(handler Handler, except Handler) Handler {
|
|||
// for the lookAhead handler is ignored.
|
||||
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(t) {
|
||||
subChild := t.Fork()
|
||||
if lookAhead(t) {
|
||||
t.Dispose(subChild)
|
||||
t.Merge(child)
|
||||
}
|
||||
child := t.Fork()
|
||||
result := lookAhead(t)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
return result
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -657,17 +652,12 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
// the handler is accepted.
|
||||
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(t) {
|
||||
subChild := t.Fork()
|
||||
if !lookAhead(t) {
|
||||
t.Dispose(subChild)
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
child := t.Fork()
|
||||
result := !lookAhead(t)
|
||||
t.Dispose(child)
|
||||
return result
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -681,7 +671,7 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
//
|
||||
// Without flushing the input, the input reader will allocate memory
|
||||
// during the parsing process, eventually enough to hold the full input
|
||||
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
|
||||
// in memory. By wrapping Handlers with an input flusher, you can tell parsekit
|
||||
// that the accumulated input so far will no longer be needed, allowing
|
||||
// this input to be flushed from memory.
|
||||
//
|
||||
|
@ -1203,7 +1193,6 @@ func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
|||
})
|
||||
}
|
||||
|
||||
// TODO Use better interpreter from parser code?
|
||||
func interpretString(str string) (string, error) {
|
||||
var sb strings.Builder
|
||||
for len(str) > 0 {
|
||||
|
|
|
@ -19,8 +19,10 @@ func TestCombinatorsTempDebug(t *testing.T) {
|
|||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"", c.Not(a.Rune('b')), false, ""},
|
||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
|
@ -71,8 +73,20 @@ func TestCombinators(t *testing.T) {
|
|||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
||||
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
||||
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
||||
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -119,8 +133,10 @@ func TestAtoms(t *testing.T) {
|
|||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||
{"", a.AnyRune, false, ""},
|
||||
{"⌘", a.ValidRune, true, "⌘"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, "<EFBFBD>"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, ""},
|
||||
{"", a.ValidRune, false, ""},
|
||||
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
|
||||
{"ok with InvalidRune", a.InvalidRune, false, ""},
|
||||
{" ", a.Space, true, " "},
|
||||
{"X", a.Space, false, ""},
|
||||
{"\t", a.Tab, true, "\t"},
|
||||
|
@ -234,6 +250,7 @@ func TestAtoms(t *testing.T) {
|
|||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||
{"fifteen", a.IntegerBetween(0, 10), false, ""},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -250,7 +267,11 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||
{"300", tokenize.MatchOctet(false), false, ""},
|
||||
|
||||
// Normalized octet.
|
||||
// Octet.
|
||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||
{"256", tokenize.MatchOctet(false), false, ""},
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "0"},
|
||||
{"000X", a.Octet, true, "0"},
|
||||
|
@ -261,6 +282,8 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
{"300", a.Octet, false, ""},
|
||||
|
||||
// IPv4 address.
|
||||
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
|
||||
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
|
||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||
|
@ -268,6 +291,8 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
{"256.255.255.255", a.IPv4, false, ""},
|
||||
|
||||
// IPv4 CIDR netmask.
|
||||
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
|
||||
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
|
||||
{"0", a.IPv4CIDRMask, true, "0"},
|
||||
{"00", a.IPv4CIDRMask, true, "0"},
|
||||
{"000", a.IPv4CIDRMask, true, "0"},
|
||||
|
@ -276,6 +301,8 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
{"33", a.IPv4CIDRMask, false, ""},
|
||||
|
||||
// IPv4 netmask in dotted quad format.
|
||||
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
|
||||
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
|
||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||
|
@ -283,6 +310,8 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
|
||||
// IPv4 address + CIDR or dotted quad netmask.
|
||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
|
||||
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||
|
@ -323,6 +352,8 @@ func TestIPv6Atoms(t *testing.T) {
|
|||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
|
@ -332,6 +363,7 @@ func TestModifiers(t *testing.T) {
|
|||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
|
||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
})
|
||||
|
@ -363,6 +395,12 @@ func TestTokenMakers(t *testing.T) {
|
|||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
|
||||
|
||||
// I don't check the returned error here, but it's good enough to see that the parsing
|
||||
// stopped after the illegal \g escape sequence.
|
||||
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||
{Type: "bar", Value: byte('R')},
|
||||
|
@ -410,9 +448,38 @@ func TestTokenMakers(t *testing.T) {
|
|||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
}},
|
||||
|
||||
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenGroup_Match(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
|
||||
|
||||
api, err := tokenizer("xxxxx")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
|
||||
contained := tokens[0].Value.([]tokenize.Token)
|
||||
AssertEqual(t, 3, len(contained), "Length of contained tokens")
|
||||
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
|
||||
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
|
||||
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
|
||||
}
|
||||
|
||||
func TestTokenGroup_Mismatch(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
|
||||
|
||||
api, err := tokenizer("12345")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
|
|
|
@ -6,8 +6,8 @@ import (
|
|||
|
||||
// Token defines a lexical token as produced by tokenize.Handlers.
|
||||
//
|
||||
// The only mandatory data in a Token are the Runes. The Type and Value fields
|
||||
// are optional fields that can be filled with data at will.
|
||||
// The Type and Value fields are optional fields that can be filled
|
||||
// with data at will.
|
||||
//
|
||||
// The use of the Type field is to let a tokenizer communicate to
|
||||
// the parser what type of token it's handling.
|
||||
|
@ -30,12 +30,12 @@ func (t Token) String() string {
|
|||
value := ""
|
||||
if t.Value != nil {
|
||||
switch t.Value.(type) {
|
||||
case []*Token:
|
||||
case []Token:
|
||||
return fmt.Sprintf("%v%v", tokenType, t.Value)
|
||||
case string:
|
||||
value = fmt.Sprintf("%q", t.Value)
|
||||
case rune:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
value = fmt.Sprintf("'%c'", t.Value)
|
||||
case bool:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
default:
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func ExampleToken_String() {
|
||||
fmt.Println(tokenize.Token{Type: "Name", Value: "Peter Pan"})
|
||||
|
||||
fmt.Println(tokenize.Token{Type: "Gender", Value: 'm'})
|
||||
|
||||
fmt.Println(tokenize.Token{Type: "CanFly", Value: true})
|
||||
|
||||
fmt.Println(tokenize.Token{Type: "Friends", Value: []tokenize.Token{
|
||||
{Type: "Name", Value: "Tinkerbell"},
|
||||
{Type: "Name", Value: "Tootles"},
|
||||
{Type: "Name", Value: "Slightly"},
|
||||
{Type: "Name", Value: "Nibs"},
|
||||
}})
|
||||
|
||||
fmt.Println(tokenize.Token{Type: "FirstMovieYear", Value: 1924})
|
||||
|
||||
// Output:
|
||||
// Name("Peter Pan")
|
||||
// Gender('m')
|
||||
// CanFly(true)
|
||||
// Friends[Name("Tinkerbell") Name("Tootles") Name("Slightly") Name("Nibs")]
|
||||
// FirstMovieYear((int)1924)
|
||||
}
|
|
@ -95,6 +95,27 @@ func TestFlushInput(t *testing.T) {
|
|||
AssertEqual(t, "cool", api.String(), "end result")
|
||||
}
|
||||
|
||||
func TestInputFlusherWrapper(t *testing.T) {
|
||||
runeA := A.Rune('a')
|
||||
flushB := C.FlushInput(A.Rune('b'))
|
||||
api := NewAPI("abaab")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
||||
AssertEqual(t, "a", api.String(), "runes after 1 read")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
||||
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
||||
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
||||
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
||||
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
|
|
Loading…
Reference in New Issue