From d96511ce0a6c6861ae9510393a4d9d26e6b57fe4 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Wed, 3 Jul 2019 15:46:43 +0000 Subject: [PATCH] Backup work. --- tokenize/api_test.go | 4 +- tokenize/handler_test.go | 10 +- tokenize/handlers_builtin.go | 173 +++++++++++++++++++++++------------ tokenize/result.go | 15 ++- tokenize/result_test.go | 16 ++-- tokenize/tokenizer_test.go | 6 +- 6 files changed, 142 insertions(+), 82 deletions(-) diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 3bdb4c7..0ce2389 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -73,8 +73,8 @@ func ExampleAPI_Result() { // API result runes as string: "new set of runes" // API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's'] // API third rune: 'w' - // API result tokens: [42("demo 1", value = (string)towel) 73("demo 2", value = (string)Zaphod)] - // API second result token: 73("demo 2", value = (string)Zaphod) + // API result tokens: [42((string)towel) 73((string)Zaphod)] + // API second result token: 73((string)Zaphod) } func ExampleAPI_Reset() { diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go index c47737c..31286fd 100644 --- a/tokenize/handler_test.go +++ b/tokenize/handler_test.go @@ -62,11 +62,11 @@ func ExampleHandler_SeparatedBy() { fmt.Printf("[%d] %v\n", i, token) } // Output: - // [0] number("123", value = (int)123) - // [1] number("456", value = (int)456) - // [2] number("7", value = (int)7) - // [3] number("8", value = (int)8) - // [4] number("9", value = (int)9) + // [0] number((int)123) + // [1] number((int)456) + // [2] number((int)7) + // [3] number((int)8) + // [4] number((int)9) } func ExampleHandler_Optional() { diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index d4111f9..0674aa1 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -26,35 +26,35 @@ import ( // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var C = struct { - Any func(...Handler) Handler - Not func(Handler) Handler - Seq func(...Handler) Handler - Min func(min int, handler Handler) Handler - Max func(max int, handler Handler) Handler - Repeated func(times int, handler Handler) Handler - Optional func(Handler) Handler - ZeroOrMore func(Handler) Handler - OneOrMore func(Handler) Handler - MinMax func(min int, max int, handler Handler) Handler - Separated func(separated Handler, separator Handler) Handler - Except func(except Handler, handler Handler) Handler - FollowedBy func(lookAhead Handler, handler Handler) Handler - WhileFollowedBy func(lookahead Handler, handler Handler) Handler + Any func(...Handler) Handler + Not func(Handler) Handler + Seq func(...Handler) Handler + Min func(min int, handler Handler) Handler + Max func(max int, handler Handler) Handler + Repeated func(times int, handler Handler) Handler + Optional func(Handler) Handler + ZeroOrMore func(Handler) Handler + OneOrMore func(Handler) Handler + MinMax func(min int, max int, handler Handler) Handler + Separated func(separated Handler, separator Handler) Handler + Except func(except Handler, handler Handler) Handler + FollowedBy func(lookAhead Handler, handler Handler) Handler + NotFollowedBy func(lookAhead Handler, handler Handler) Handler }{ - Any: MatchAny, - Not: MatchNot, - Seq: MatchSeq, - Min: MatchMin, - Max: MatchMax, - Repeated: MatchRep, - Optional: MatchOptional, - ZeroOrMore: MatchZeroOrMore, - OneOrMore: MatchOneOrMore, - MinMax: MatchMinMax, - Separated: MatchSeparated, - Except: MatchExcept, - FollowedBy: MatchFollowedBy, - WhileFollowedBy: MatchWhileFollowedBy, + Any: MatchAny, + Not: MatchNot, + Seq: MatchSeq, + Min: MatchMin, + Max: MatchMax, + Repeated: MatchRep, + Optional: MatchOptional, + ZeroOrMore: MatchZeroOrMore, + OneOrMore: MatchOneOrMore, + MinMax: MatchMinMax, + Separated: MatchSeparated, + Except: MatchExcept, + FollowedBy: MatchFollowedBy, + NotFollowedBy: MatchNotFollowedBy, } // A provides convenient access to a range of atoms or functions to build atoms. @@ -290,15 +290,19 @@ var T = struct { Int16 func(interface{}, Handler) Handler Int32 func(interface{}, Handler) Handler Int64 func(interface{}, Handler) Handler + Int64Base func(interface{}, int, Handler) Handler Uint func(interface{}, Handler) Handler Uint8 func(interface{}, Handler) Handler Uint16 func(interface{}, Handler) Handler Uint32 func(interface{}, Handler) Handler Uint64 func(interface{}, Handler) Handler + Uint64Base func(interface{}, int, Handler) Handler Float32 func(interface{}, Handler) Handler Float64 func(interface{}, Handler) Handler Boolean func(interface{}, Handler) Handler - ByCallback func(Handler, func(t *API) *Token) Handler + ByValue func(toktype interface{}, handler Handler, value interface{}) Handler + ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler + Group func(interface{}, Handler) Handler }{ Str: MakeStrLiteralToken, StrInterpreted: MakeStrInterpretedToken, @@ -309,15 +313,19 @@ var T = struct { Int16: MakeInt16Token, Int32: MakeInt32Token, Int64: MakeInt64Token, + Int64Base: MakeInt64BaseToken, Uint: MakeUintToken, Uint8: MakeUint8Token, Uint16: MakeUint16Token, Uint32: MakeUint32Token, Uint64: MakeUint64Token, + Uint64Base: MakeUint64BaseToken, Float32: MakeFloat32Token, Float64: MakeFloat64Token, Boolean: MakeBooleanToken, + ByValue: MakeTokenByValue, ByCallback: MakeTokenByCallback, + Group: MakeTokenGroup, } // MatchRune creates a Handler function that matches against the provided rune. @@ -605,16 +613,15 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { } } -// TODO keep this? Make some useful tests first. -func MatchWhileFollowedBy(lookAhead Handler, handler Handler) Handler { - followedBy := MatchFollowedBy(lookAhead, handler) +// TODO keep this? +func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { return func(t *API) bool { - matches := 0 - for followedBy(t) { - fmt.Printf("Matches so far: %q\n", t.Result().String()) - matches++ + child := t.Fork() + if handler(child) && !lookAhead(child.Fork()) { + child.Merge() + return true } - return matches > 0 + return false } } @@ -1096,9 +1103,9 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { // escape sequence like "\n" is kept as-is (a backslash character, followed by // an 'n'-character). func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(handler, func(t *API) *Token { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { literal := t.Result().String() - return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal} + return literal }) } @@ -1107,10 +1114,10 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { // representation of the read Runes. This string is interpreted, meaning that an // escape sequence like "\n" is translated to an actual newline control character func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(handler, func(t *API) *Token { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { // TODO ERROR HANDLING interpreted, _ := interpretString(t.Result().String()) - return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted} + return interpreted }) } @@ -1131,9 +1138,9 @@ func interpretString(str string) (string, error) { // Result, for which the Token.Value is set to a Rune-representation // of the read Rune. func MakeRuneToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(handler, func(t *API) *Token { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { // TODO ERROR HANDLING --- not a 1 rune input - return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)} + return t.Result().Rune(0) }) } @@ -1141,9 +1148,9 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to a Byte-representation // of the read Rune. func MakeByteToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(handler, func(t *API) *Token { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { // TODO ERROR HANDLING --- not a 1 byte input - return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))} + return byte(t.Result().Rune(0)) }) } @@ -1199,13 +1206,14 @@ func MakeInt32Token(toktype interface{}, handler Handler) Handler { }) } -// MakeInt64Token creates a Handler that will add a Token to the +// MakeInt64BaseToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int64-representation -// of the read Rune. -func MakeInt64Token(toktype interface{}, handler Handler) Handler { +// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, +// 10 = decimal, 16 = hexadecimal). +func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { - value, err := strconv.ParseInt(s, 10, 64) + value, err := strconv.ParseInt(s, base, 64) if err == nil { return int64(value), err } @@ -1213,6 +1221,13 @@ func MakeInt64Token(toktype interface{}, handler Handler) Handler { }) } +// MakeInt64Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int64-representation +// of the read Rune. +func MakeInt64Token(toktype interface{}, handler Handler) Handler { + return MakeInt64BaseToken(toktype, 10, handler) +} + // MakeUintToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint-representation // of the read Rune. @@ -1270,13 +1285,14 @@ func MakeUint32Token(toktype interface{}, handler Handler) Handler { }) } -// MakeUint64Token creates a Handler that will add a Token to the +// MakeUint64BaseToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint64-representation -// of the read Rune. -func MakeUint64Token(toktype interface{}, handler Handler) Handler { +// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, +// 10 = decimal, 16 = hexadecimal). +func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { - value, err := strconv.ParseUint(s, 10, 64) + value, err := strconv.ParseUint(s, base, 64) if err == nil { return uint64(value), err } @@ -1284,6 +1300,13 @@ func MakeUint64Token(toktype interface{}, handler Handler) Handler { }) } +// MakeUint64Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint64-representation +// of the read Rune. +func MakeUint64Token(toktype interface{}, handler Handler) Handler { + return MakeUint64BaseToken(toktype, 10, handler) +} + // MakeFloat32Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an float32-representation // of the read Rune. @@ -1331,7 +1354,7 @@ func makeStrconvToken(toktype interface{}, handler Handler, convert func(s strin fullName := runtime.FuncForPC(pc).Name() parts := strings.Split(fullName, ".") name := parts[len(parts)-1] - return MakeTokenByCallback(handler, func(t *API) *Token { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { value, err := convert(t.Result().String()) if err != nil { // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? @@ -1340,20 +1363,50 @@ func makeStrconvToken(toktype interface{}, handler Handler, convert func(s strin "(only use a type conversion token maker, when the input has been "+ "validated on beforehand)", name, t.Result().String(), err)) } - return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value} + return value }) } +// MakeTokenByValue creates a Handler that will add a static Token value +// to the Result. +func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler { + return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value }) +} + // MakeTokenByCallback creates a Handler that will add a Token to the -// Result, for which the Token is to be generated by the provided -// callback function. The function gets the current API as its input and -// must return a complete Token. -func MakeTokenByCallback(handler Handler, callback func(t *API) *Token) Handler { +// Result, for which the Token.Value is to be generated by the provided +// makeValue() callback function. The function gets the current API as +// its input and must return the token value. +func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler { return func(t *API) bool { child := t.Fork() if handler(child) { - t.Result().AddTokens(callback(child)) + // The token is not added to the child here. The child might have produced its own + // tokens and we want those to come after the token for the current parsing level. + // By adding the token to the input API and then merging the child tokens, the order + // of the tokens will match the expectations. + // e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the + // tokens will end up in the order "date", "year", "month", "day". When we'd add the + // token to the child here, the order would have been "year", "month", "day", "date". + token := &Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)} + t.Result().AddTokens(token) child.Merge() + + return true + } + return false + } +} + +func MakeTokenGroup(toktype interface{}, handler Handler) Handler { + return func(t *API) bool { + child := t.Fork() + if handler(child) { + result := child.Result() + token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()} + result.SetTokens(token) + child.Merge() + return true } return false diff --git a/tokenize/result.go b/tokenize/result.go index 47d6cfd..8519985 100644 --- a/tokenize/result.go +++ b/tokenize/result.go @@ -48,10 +48,21 @@ func (t Token) String() string { value := "" if t.Value != nil { - value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value) + switch t.Value.(type) { + case []*Token: + return fmt.Sprintf("%v%v", tokenType, t.Value) + case string: + value = fmt.Sprintf("%q", t.Value) + case rune: + value = fmt.Sprintf("%v", t.Value) + case bool: + value = fmt.Sprintf("%v", t.Value) + default: + value = fmt.Sprintf("(%T)%v", t.Value, t.Value) + } } - return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value) + return fmt.Sprintf("%v(%s)", tokenType, value) } // newResult initializes an empty Result struct. diff --git a/tokenize/result_test.go b/tokenize/result_test.go index 77a371d..f188717 100644 --- a/tokenize/result_test.go +++ b/tokenize/result_test.go @@ -9,12 +9,9 @@ import ( ) func ExampleToken() { - t0 := tokenize.Token{ - Runes: []rune("10.1.2.3"), - } + t0 := tokenize.Token{} t1 := tokenize.Token{ - Runes: []rune("two hundred and twenty four"), Type: "Number", Value: 224, } @@ -22,22 +19,21 @@ func ExampleToken() { const TName = 1 t2 := tokenize.Token{ - Runes: []rune("John"), Type: TName, + Value: "John", } t3 := tokenize.Token{ - Runes: []rune("The answer"), Value: 42, } fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3) // Output: - // ("10.1.2.3") - // Number("two hundred and twenty four", value = (int)224) - // 1("John") - // ("The answer", value = (int)42) + // () + // Number((int)224) + // 1((string)John) + // ((int)42) } func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index c3673bc..9361798 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -46,9 +46,9 @@ func ExampleNew() { } } // Output: - // Result: [ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)] - // Result: [ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)] - // Result: [ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)] + // Result: [ip((string)0.0.0.0) mask((int8)0)] + // Result: [ip((string)192.168.0.1) mask((int8)24)] + // Result: [ip((string)255.255.255.255) mask((int8)32)] // Error: mismatch at start of file // Error: mismatch at start of file }