package tokenize import ( "fmt" "io" "net" "strconv" "strings" "unicode" "unicode/utf8" ) // C provides convenient access to a range of parser/combinators that can be // used to construct Handler functions. // // Parser/combinators are so called higher order functions that take in one // or more other Handler functions and output a new Handler. They can be // used to combine Handler functions in useful ways to create new more complex // Handler functions. // // When using C in your own parser, then it is advised to create a variable // to reference it, for example: // // c := tokenize.C // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var C = struct { Any func(...Handler) Handler Not func(Handler) Handler Seq func(...Handler) Handler Min func(min int, handler Handler) Handler Max func(max int, handler Handler) Handler Repeated func(times int, handler Handler) Handler Optional func(Handler) Handler ZeroOrMore func(Handler) Handler OneOrMore func(Handler) Handler MinMax func(min int, max int, handler Handler) Handler Separated func(separator Handler, separated Handler) Handler Except func(except Handler, handler Handler) Handler FollowedBy func(lookAhead Handler, handler Handler) Handler NotFollowedBy func(lookAhead Handler, handler Handler) Handler InOptionalBlanks func(handler Handler) Handler FlushInput func(Handler) Handler }{ Any: MatchAny, Not: MatchNot, Seq: MatchSeq, Min: MatchMin, Max: MatchMax, Repeated: MatchRep, Optional: MatchOptional, ZeroOrMore: MatchZeroOrMore, OneOrMore: MatchOneOrMore, MinMax: MatchMinMax, Separated: MatchSeparated, Except: MatchExcept, FollowedBy: MatchFollowedBy, NotFollowedBy: MatchNotFollowedBy, InOptionalBlanks: MatchInOptionalBlanks, FlushInput: MakeInputFlusher, } // A provides convenient access to a range of atoms or functions to build atoms. // // When using A in your own parser, then it is advised to create a variable // to reference it: // // a := tokenize.A // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var A = struct { Char func(...rune) Handler CharRange func(...rune) Handler ByteByCallback func(func(byte) bool) Handler BytesByCallback func(func(byte) bool) Handler RuneByCallback func(func(rune) bool) Handler AnyByte Handler AnyRune Handler ValidRune Handler ValidRunes Handler InvalidRune Handler Str func(string) Handler StrNoCase func(string) Handler EndOfLine Handler EndOfFile Handler UntilEndOfLine Handler Space Handler Tab Handler CR Handler LF Handler CRLF Handler Excl Handler DoubleQuote Handler Hash Handler Dollar Handler Percent Handler Amp Handler SingleQuote Handler RoundOpen Handler LeftParen Handler RoundClose Handler RightParen Handler Asterisk Handler Multiply Handler Plus Handler Add Handler Comma Handler Minus Handler Subtract Handler Dot Handler Slash Handler Divide Handler Colon Handler Semicolon Handler AngleOpen Handler LessThan Handler Equal Handler AngleClose Handler GreaterThan Handler Question Handler At Handler SquareOpen Handler Backslash Handler SquareClose Handler Caret Handler Underscore Handler Backquote Handler CurlyOpen Handler Pipe Handler CurlyClose Handler Tilde Handler Newline Handler Blank Handler Blanks Handler Whitespace Handler UnicodeSpace Handler Digit Handler DigitNotZero Handler Digits Handler Zero Handler Boolean Handler Signed func(Handler) Handler Integer Handler IntegerBetween func(min int64, max int64) Handler Decimal Handler ASCII Handler ASCIILower Handler ASCIIUpper Handler Letter Handler Lower Handler Upper Handler HexDigit Handler Octet Handler IPv4 Handler IPv4CIDRMask Handler IPv4Netmask Handler IPv4Net Handler IPv6 Handler IPv6CIDRMask Handler IPv6Net Handler }{ Char: MatchChar, CharRange: MatchCharRange, ByteByCallback: MatchByteByCallback, BytesByCallback: MatchBytesByCallback, RuneByCallback: MatchRuneByCallback, AnyByte: MatchAnyByte(), AnyRune: MatchAnyRune(), ValidRune: MatchValidRune(), ValidRunes: MatchValidRunes(), InvalidRune: MatchInvalidRune(), Str: MatchStr, StrNoCase: MatchStrNoCase, EndOfFile: MatchEndOfFile(), EndOfLine: MatchEndOfLine(), UntilEndOfLine: MatchUntilEndOfLine(), Space: MatchChar(' '), Tab: MatchChar('\t'), CR: MatchChar('\r'), LF: MatchChar('\n'), CRLF: MatchStr("\r\n"), Excl: MatchChar('!'), DoubleQuote: MatchChar('"'), Hash: MatchChar('#'), Dollar: MatchChar('$'), Percent: MatchChar('%'), Amp: MatchChar('&'), SingleQuote: MatchChar('\''), RoundOpen: MatchChar('('), LeftParen: MatchChar('('), RoundClose: MatchChar(')'), RightParen: MatchChar(')'), Asterisk: MatchChar('*'), Multiply: MatchChar('*'), Plus: MatchChar('+'), Add: MatchChar('+'), Comma: MatchChar(','), Minus: MatchChar('-'), Subtract: MatchChar('-'), Dot: MatchChar('.'), Slash: MatchChar('/'), Divide: MatchChar('/'), Colon: MatchChar(':'), Semicolon: MatchChar(';'), AngleOpen: MatchChar('<'), LessThan: MatchChar('<'), Equal: MatchChar('='), AngleClose: MatchChar('>'), GreaterThan: MatchChar('>'), Question: MatchChar('?'), At: MatchChar('@'), SquareOpen: MatchChar('['), Backslash: MatchChar('\\'), SquareClose: MatchChar(']'), Caret: MatchChar('^'), Underscore: MatchChar('_'), Backquote: MatchChar('`'), CurlyOpen: MatchChar('{'), Pipe: MatchChar('|'), CurlyClose: MatchChar('}'), Tilde: MatchChar('~'), Newline: MatchNewline(), Blank: MatchBlank(), Blanks: MatchBlanks(), Whitespace: MatchWhitespace(), UnicodeSpace: MatchUnicodeSpace(), Digit: MatchDigit(), DigitNotZero: MatchDigitNotZero(), Digits: MatchDigits(), Zero: MatchChar('0'), Signed: MatchSigned, Integer: MatchInteger(true), IntegerBetween: MatchIntegerBetween, Decimal: MatchDecimal(true), Boolean: MatchBoolean(), ASCII: MatchASCII(), ASCIILower: MatchASCIILower(), ASCIIUpper: MatchASCIIUpper(), Letter: MatchUnicodeLetter(), Lower: MatchUnicodeLower(), Upper: MatchUnicodeUpper(), HexDigit: MatchHexDigit(), Octet: MatchOctet(true), IPv4: MatchIPv4(true), IPv4CIDRMask: MatchIPv4CIDRMask(true), IPv4Netmask: MatchIPv4Netmask(true), IPv4Net: MatchIPv4Net(true), IPv6: MatchIPv6(true), IPv6CIDRMask: MatchIPv6CIDRMask(true), IPv6Net: MatchIPv6Net(true), } // M provides convenient access to a range of modifiers (which in their nature are // parser/combinators) that can be used when creating Handler functions. // // In parsekit, a modifier is defined as a Handler function that modifies the // resulting output of another Handler in some way. It does not do any matching // against input of its own. // // When using M in your own parser, then it is advised to create a variable // to reference it: // // m := tokenize.M // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var M = struct { Drop func(Handler) Handler Trim func(handler Handler, cutset string) Handler TrimLeft func(handler Handler, cutset string) Handler TrimRight func(handler Handler, cutset string) Handler TrimSpace func(handler Handler) Handler ToLower func(Handler) Handler ToUpper func(Handler) Handler Replace func(handler Handler, replaceWith string) Handler ByCallback func(Handler, func(string) string) Handler }{ Drop: ModifyDrop, Trim: ModifyTrim, TrimLeft: ModifyTrimLeft, TrimRight: ModifyTrimRight, TrimSpace: ModifyTrimSpace, ToLower: ModifyToLower, ToUpper: ModifyToUpper, Replace: ModifyReplace, ByCallback: ModifyByCallback, } // T provides convenient access to a range of Token producers (which in their // nature are parser/combinators) that can be used when creating Handler // functions. // // When using T in your own parser, then it is advised to create a variable // to reference it: // // t := tokenize.T // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var T = struct { Str func(interface{}, Handler) Handler StrInterpreted func(interface{}, Handler) Handler Byte func(interface{}, Handler) Handler Rune func(interface{}, Handler) Handler Int func(interface{}, Handler) Handler Int8 func(interface{}, Handler) Handler Int16 func(interface{}, Handler) Handler Int32 func(interface{}, Handler) Handler Int64 func(interface{}, Handler) Handler Int64Base func(interface{}, int, Handler) Handler Uint func(interface{}, Handler) Handler Uint8 func(interface{}, Handler) Handler Uint16 func(interface{}, Handler) Handler Uint32 func(interface{}, Handler) Handler Uint64 func(interface{}, Handler) Handler Uint64Base func(interface{}, int, Handler) Handler Float32 func(interface{}, Handler) Handler Float64 func(interface{}, Handler) Handler Boolean func(interface{}, Handler) Handler ByValue func(toktype interface{}, handler Handler, value interface{}) Handler ByCallback func(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler Group func(interface{}, Handler) Handler }{ Str: MakeStrLiteralToken, StrInterpreted: MakeStrInterpretedToken, Byte: MakeByteToken, Rune: MakeRuneToken, Int: MakeIntToken, Int8: MakeInt8Token, Int16: MakeInt16Token, Int32: MakeInt32Token, Int64: MakeInt64Token, Int64Base: MakeInt64BaseToken, Uint: MakeUintToken, Uint8: MakeUint8Token, Uint16: MakeUint16Token, Uint32: MakeUint32Token, Uint64: MakeUint64Token, Uint64Base: MakeUint64BaseToken, Float32: MakeFloat32Token, Float64: MakeFloat64Token, Boolean: MakeBooleanToken, ByValue: MakeTokenByValue, ByCallback: MakeTokenByCallback, Group: MakeTokenGroup, } func MatchChar(expected ...rune) Handler { if len(expected) == 0 { callerPanic("MatchChar", "Handler: {name} definition error at {caller}: at least one character must be provided") } if len(expected) == 1 { return matchAgainstSingleChar(expected[0]) } return matchAgainstMultipleChars(expected) } func matchAgainstSingleChar(expected rune) Handler { // Handle an ASCII character. if expected <= '\x7F' { expectedByte := byte(expected) return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err == nil && b == expectedByte { tokenAPI.Input.Byte.Accept(b) return true } return false } } // Handle an UTF8 character. return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && r == expected { tokenAPI.Input.Rune.Accept(r) return true } return false } } func matchAgainstMultipleChars(expected []rune) Handler { // Check if all characters are ASCII characters. onlyBytes := true expectedBytes := make([]byte, len(expected)) for i, r := range expected { if r > '\x7F' { onlyBytes = false break } expectedBytes[i] = byte(r) } // Handle ASCII characters. if onlyBytes { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err != nil { return false } for _, e := range expectedBytes { if b == e { tokenAPI.Input.Byte.Accept(b) return true } } return false } } // Handle UTF8 characters. return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err != nil { return false } for _, e := range expected { if r == e { tokenAPI.Input.Rune.Accept(r) return true } } return false } } func MatchCharRange(expected ...rune) Handler { if len(expected) == 0 { callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: at least one character range pair must be provided") } if len(expected)%2 != 0 { callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: an even number of character range pairs must be provided") } starts := make([]rune, len(expected)) ends := make([]rune, len(expected)) for i := 0; i < len(expected); i += 2 { start := expected[i] end := expected[i+1] if start > end { callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: start %q must be <= end %q", start, end) } starts[i/2] = start ends[i/2] = end } if len(expected) == 1 { return matchAgainstSingleCharRange(starts[0], ends[0]) } return matchAgainstMultipleCharRanges(starts, ends) } func matchAgainstSingleCharRange(start rune, end rune) Handler { if end <= '\x7F' { start := byte(start) end := byte(end) return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err == nil && b >= start && b <= end { tokenAPI.Input.Byte.Accept(b) return true } return false } } return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && r >= start && r <= end { tokenAPI.Input.Rune.Accept(r) return true } return false } } func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler { // Check if all characters are ASCII characters. onlyBytes := true expectedStarts := make([]byte, len(starts)) // TODO I see one extra with start/end 0/0 in debugging expectedEnds := make([]byte, len(ends)) for i, start := range starts { end := ends[i] if end > '\x7F' { onlyBytes = false break } expectedStarts[i] = byte(start) expectedEnds[i] = byte(end) } if onlyBytes { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) for i := range expectedStarts { if err == nil && b >= expectedStarts[i] && b <= expectedEnds[i] { tokenAPI.Input.Byte.Accept(b) return true } } return false } } return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) for i := range starts { if err == nil && r >= starts[i] && r <= ends[i] { tokenAPI.Input.Rune.Accept(r) return true } } return false } } // MatchNewline creates a handler that matches a newline, which is either // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). func MatchNewline() Handler { return func(tokenAPI *API) bool { b1, err := tokenAPI.Input.Byte.Peek(0) if err != nil { return false } if b1 == '\n' { tokenAPI.Input.Byte.Accept(b1) return true } if b1 == '\r' { b2, err := tokenAPI.Input.Byte.Peek(1) if err == nil && b2 == '\n' { tokenAPI.Input.Byte.AcceptMulti(b1, b2) return true } } return false } } // MatchBlank creates a Handler that matches one rune from the input // against blank characters, meaning tabs and spaces. // // When you need whitespace matching, which also includes characters like // newlines, then take a look at MatchWhitespace(). func MatchBlank() Handler { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err == nil && (b == ' ' || b == '\t') { tokenAPI.Input.Byte.Accept(b) return true } return false } } // MatchBlanks creates a Handler that matches the input against one // or more blank characters, meaning tabs and spaces. // // When you need whitespace matching, which also includes characters like // newlines, then make use of MatchWhitespace(). // When you need unicode whitespace matching, which also includes characters // like a vertical tab, then make use of MatchUnicodeSpace(). func MatchBlanks() Handler { return func(tokenAPI *API) bool { f := tokenAPI.Input.Byte.AcceptMulti if tokenAPI.Output.suspended > 0 { f = tokenAPI.Input.Byte.MoveCursorMulti } ok := false for { chunk, err := tokenAPI.Input.Byte.PeekBuffered(0) for i, b := range chunk { if b != ' ' && b != '\t' { if i > 0 { f(chunk[:i]...) } return ok } ok = true } if err != nil { if err == io.EOF { if len(chunk) > 0 { f(chunk...) } return ok } return false } f(chunk...) } } } // MatchWhitespace creates a Handler that matches the input against one or more // whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and // carriage return '\r' followed by a newline '\n' (CRLF). func MatchWhitespace() Handler { return func(tokenAPI *API) bool { f := tokenAPI.Input.Byte.AcceptMulti if tokenAPI.Output.suspended > 0 { f = tokenAPI.Input.Byte.MoveCursorMulti } ok := false for { chunk, err := tokenAPI.Input.Byte.PeekBuffered(0) for i, b := range chunk { if b != ' ' && b != '\t' && b != '\n' && b != '\r' { if i > 0 { f(chunk[:i]...) } return ok } ok = true } if err != nil { if err == io.EOF { if len(chunk) > 0 { f(chunk...) } return ok } return false } f(chunk...) } } } // MatchUnicodeSpace creates a Handler that matches the input against one or more // whitespace characters, as defined by unicode. func MatchUnicodeSpace() Handler { return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace)) } // MatchByteByCallback creates a Handler that matches a single byte from the // input against the provided callback function. When the callback returns true, // it is considered a match. func MatchByteByCallback(callback func(byte) bool) Handler { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err == nil && callback(b) { tokenAPI.Input.Byte.Accept(b) return true } return false } } // MatchBytesByCallback creates a Handler that matches one or more bytes from the // input against the provided callback function. As long as the callback returns true, // it is considered a match. func MatchBytesByCallback(callback func(byte) bool) Handler { return func(tokenAPI *API) bool { f := tokenAPI.Input.Byte.AcceptMulti if tokenAPI.Output.suspended > 0 { f = tokenAPI.Input.Byte.MoveCursorMulti } ok := false for { chunk, err := tokenAPI.Input.Byte.PeekBuffered(0) for i, b := range chunk { if !callback(b) { if i > 0 { f(chunk[:i]...) } return ok } ok = true } if err != nil { if err == io.EOF { if len(chunk) > 0 { f(chunk...) } return ok } return false } f(chunk...) } } } // MatchRuneByCallback creates a Handler that matches a single rune from the // input against the provided callback function. When the callback returns true, // it is considered a match. // // Note that the callback function matches the signature of the unicode.Is* functions, // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). func MatchRuneByCallback(callback func(rune) bool) Handler { return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && callback(r) { tokenAPI.Input.Rune.Accept(r) return true } return false } } // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. func MatchEndOfLine() Handler { return func(tokenAPI *API) bool { b1, err := tokenAPI.Input.Byte.Peek(0) if err != nil { return err == io.EOF } if b1 == '\n' { tokenAPI.Input.Byte.Accept(b1) return true } if b1 == '\r' { b2, _ := tokenAPI.Input.Byte.Peek(1) if b2 == '\n' { tokenAPI.Input.Byte.AcceptMulti(b1, b2) return true } } return false } } // MatchStr creates a Handler that matches the input against the provided string. func MatchStr(expected string) Handler { expectedBytes := []byte(expected) expectedLength := len(expectedBytes) return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.PeekMulti(0, expectedLength) if err != nil || len(b) < expectedLength { return false } for i, bExpected := range expectedBytes { if b[i] != bExpected { return false } } tokenAPI.Input.Byte.AcceptMulti(expectedBytes...) return true } } // MatchStrNoCase creates a Handler that matches the input against the // provided string in a case-insensitive manner. func MatchStrNoCase(expected string) Handler { l := utf8.RuneCountInString(expected) return func(tokenAPI *API) bool { matches := make([]rune, l) offset := 0 i := 0 for _, e := range expected { if e <= '\x7F' { b, err := tokenAPI.Input.Byte.Peek(offset) if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) { return false } matches[i] = rune(b) offset++ } else { r, w, err := tokenAPI.Input.Rune.Peek(offset) if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) { return false } matches[i] = r offset += w } i++ } tokenAPI.Input.Rune.AcceptMulti(matches...) return true } } // MatchOptional creates a Handler that makes the provided Handler optional. // When the provided Handler applies, then its output is used, otherwise // no output is generated but still a successful match is reported (but the // result will be empty). func MatchOptional(handler Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() if !handler(tokenAPI) { tokenAPI.RestoreSnapshot(snap) } return true } } // MatchSeq creates a Handler that checks if the provided Handlers can be // applied in their exact order. Only if all Handlers apply, the sequence // reports successful match. func MatchSeq(handlers ...Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() for _, handler := range handlers { split := tokenAPI.SplitOutput() if !handler(tokenAPI) { tokenAPI.RestoreSnapshot(snap) return false } tokenAPI.MergeSplitOutput(split) } return true } } // MatchAny creates a Handler that checks if any of the provided Handlers // can be applied. They are applied in their provided order. The first Handler // that applies is used for reporting back a match. func MatchAny(handlers ...Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() for _, handler := range handlers { if handler(tokenAPI) { return true } tokenAPI.RestoreSnapshot(snap) } return false } } // MatchNot creates a Handler that checks if the provided Handler applies to // the current input. If it does, then a failed match will be reported. If it // does not, then the next rune from the input will be reported as a match. func MatchNot(handler Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() if handler(tokenAPI) { tokenAPI.RestoreSnapshot(snap) return false } r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil { tokenAPI.Input.Rune.Accept(r) return true } return false } } // MatchRep creates a Handler that checks if the provided Handler can be // applied exactly the provided amount of times. // // Note that the input can contain more than the provided number of matches, e.g.: // // MatchRep(4, MatchChar('X')) // // will not match input "XXX", it will match input "XXXX", but also "XXXXXX". // In that last case, there will be a remainder "XX" on the input. // // Another way to use this method, is by applying the following syntactic sugar: // // MatchChar('X').Times(4) func MatchRep(times int, handler Handler) Handler { return matchMinMax(times, times, handler, "MatchRep") } // MatchMin creates a Handler that checks if the provided Handler can be // applied at least the provided minimum number of times. // When more matches are possible, these will be included in the output. func MatchMin(min int, handler Handler) Handler { if min < 0 { callerPanic("MatchMin", "Handler: {name} definition error at {caller}: min must be >= 0") } return matchMinMax(min, -1, handler, "MatchMin") } // MatchMax creates a Handler that checks if the provided Handler can be // applied at maximum the provided minimum number of times. // When more matches are possible, thhandler(ese will be included in the output. // Zero matches are considered a successful match. func MatchMax(max int, handler Handler) Handler { if max < 0 { callerPanic("MatchMax", "Handler: {name} definition error at {caller}: max must be >= 0") } return matchMinMax(0, max, handler, "MatchMax") } // MatchZeroOrMore creates a Handler that checks if the provided Handler can // be applied zero or more times. All matches will be included in the output. // Zero matches are considered a successful match. func MatchZeroOrMore(handler Handler) Handler { return matchMinMax(0, -1, handler, "MatchZeroOfMore") } // MatchOneOrMore creates a Handler that checks if the provided Handler can // be applied one or more times. All matches will be included in the output. func MatchOneOrMore(handler Handler) Handler { return matchMinMax(1, -1, handler, "MatchOneOrMore") } // MatchMinMax creates a Handler that checks if the provided Handler can // be applied between the provided minimum and maximum number of times, // inclusive. All matches will be included in the output. func MatchMinMax(min int, max int, handler Handler) Handler { if max < 0 { callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: max must be >= 0") } if min < 0 { callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: min must be >= 0") } return matchMinMax(min, max, handler, "MatchMinMax") } func matchMinMax(min int, max int, handler Handler, name string) Handler { if max >= 0 && min > max { callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min) } return func(tokenAPI *API) bool { total := 0 // Check for the minimum required amount of matches. snap := tokenAPI.MakeSnapshot() for total < min { total++ split := tokenAPI.SplitOutput() ok := handler(tokenAPI) tokenAPI.MergeSplitOutput(split) if !ok { tokenAPI.RestoreSnapshot(snap) return false } } // No specified max: include the rest of the available matches. // Specified max: include the rest of the availble matches, up to the max. //child.Merge() for max < 0 || total < max { total++ split := tokenAPI.SplitOutput() ok := handler(tokenAPI) tokenAPI.MergeSplitOutput(split) if !ok { break } } return true } } // MatchSeparated creates a Handler that checks for a pattern of one or more // Handlers of one type (the separated), separated by Handler of another type // (the separator). All matches (separated + separator) are included in the // output. func MatchSeparated(separator Handler, separated Handler) Handler { return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated))) } // MatchExcept creates a Handler that checks if the provided Handler can be // applied to the upcoming input. It also checks if the except Handler can be // applied. If the handler applies, but the except Handler as well, then the match // as a whole will be treated as a mismatch. func MatchExcept(handler Handler, except Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() if except(tokenAPI) { tokenAPI.RestoreSnapshot(snap) return false } return handler(tokenAPI) } } // MatchFollowedBy creates a Handler that checks if the provided handler matches // and if the provided lookAhead handler matches after the handler. // When both handlers match, the match for the handler is accepted and the match // for the lookAhead handler is ignored. func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { snap := tokenAPI.MakeSnapshot() ok := lookAhead(tokenAPI) tokenAPI.RestoreSnapshot(snap) return ok } return false } } // MatchNotFollowedBy creates a Handler that checks if the provided handler matches // and if the provided lookAhead handler does not match after the handler. // If the handler matches and the lookAhead handler doesn't, then the match for // the handler is accepted. func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { snap := tokenAPI.MakeSnapshot() ok := !lookAhead(tokenAPI) tokenAPI.RestoreSnapshot(snap) return ok } return false } } func MatchInOptionalBlanks(handler Handler) Handler { blanks := MatchBlanks() return func(tokenAPI *API) bool { tokenAPI.Output.Suspend() blanks(tokenAPI) tokenAPI.Output.Resume() if !handler(tokenAPI) { return false } tokenAPI.Output.Suspend() blanks(tokenAPI) tokenAPI.Output.Resume() return true } } // MakeInputFlusher creates a Handler that will flush the input buffer when the // provided handler matches. // // This is useful when constructing a grammar using only parsekit.tokenize // functionality (parsekit.parse will automatically flush the input for you) // that has to process large input data. // // Without flushing the input, the input reader will allocate memory // during the parsing process, eventually enough to hold the full input // in memory. By wrapping Handlers with an input flusher, you can tell parsekit // that the accumulated input so far will no longer be needed, allowing // this input to be flushed from memory. // // Rule of thumb is: only use it when you have to actually fix a memory // hogging issue for your use case. func MakeInputFlusher(handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { tokenAPI.Input.Flush() return true } return false } } // MatchSigned creates a Handler that checks if the provided Handler is // prefixed by an optional '+' or '-' sign. This can be used to turn numeric // atoms into a signed version, e.g. // // C.Signed(A.Integer) func MatchSigned(handler Handler) Handler { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err != nil { return false } snap := tokenAPI.MakeSnapshot() if b == '-' || b == '+' { tokenAPI.Input.Byte.Accept(b) } if handler(tokenAPI) { return true } tokenAPI.RestoreSnapshot(snap) return false } } // MatchIntegerBetween creates a Handler that checks for an integer // value between the provided min and max boundaries (inclusive). // It uses an int64 for checking internally, so you can check values // ranging from -9223372036854775808 to 9223372036854775807. func MatchIntegerBetween(min int64, max int64) Handler { if max < min { callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min) } digits := MatchSigned(MatchDigits()) return func(tokenAPI *API) bool { if !digits(tokenAPI) { return false } value, _ := strconv.ParseInt(tokenAPI.Output.String(), 10, 64) if value < min || value > max { return false } return true } } // MatchEndOfFile creates a Handler that checks if the end of the input data // has been reached. This Handler will never produce output. It only reports // a successful or a failing match through its boolean return value. func MatchEndOfFile() Handler { return func(tokenAPI *API) bool { _, err := tokenAPI.Input.Byte.Peek(0) return err == io.EOF } } // MatchUntilEndOfLine creates a Handler function that accepts one or // more runes until the end of the line (or file when that's the case). // The newline itself is not included in the match. func MatchUntilEndOfLine() Handler { return func(tokenAPI *API) bool { f := tokenAPI.Input.Byte.AcceptMulti if tokenAPI.Output.suspended > 0 { f = tokenAPI.Input.Byte.MoveCursorMulti } state := 0 ok := false for { chunk, err := tokenAPI.Input.Byte.PeekBuffered(0) for i, b := range chunk { if b == '\r' { state = 1 continue } if b == '\n' { if i+state > 0 { f(chunk[:i+state]...) } return ok } state = 0 ok = true } if err != nil { if err == io.EOF { if len(chunk) > 0 { f(chunk...) } return ok } return false } f(chunk...) } } } // MatchAnyByte creates a Handler function that accepts any byte from the input. func MatchAnyByte() Handler { return func(tokenAPI *API) bool { b, err := tokenAPI.Input.Byte.Peek(0) if err == nil { tokenAPI.Input.Byte.Accept(b) return true } return false } } // MatchAnyRune creates a Handler function that checks if a rune can be // read from the input. Invalid runes on the input are replaced with the UTF8 // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as �. func MatchAnyRune() Handler { return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil { tokenAPI.Input.Rune.Accept(r) return true } return false } } // MatchValidRune creates a Handler function that checks if a valid // UTF8 rune can be read from the input. func MatchValidRune() Handler { return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && r != utf8.RuneError { tokenAPI.Input.Rune.Accept(r) return true } return false } } func MatchValidRunes() Handler { blockSize := 128 return func(tokenAPI *API) bool { rs := make([]rune, blockSize, blockSize) ok := false for { bs, err := tokenAPI.Input.Byte.PeekMulti(0, blockSize) end := 0 offset := 0 maxOffset := len(bs) - 1 for offset <= maxOffset { r, w := utf8.DecodeRune(bs[offset:]) if r == utf8.RuneError { if end > 0 { // We might be looking at a partial UTF8 rune at the end of the []bytes. // Don't stop decoding here, but instead go into the next chunk. // If we're actually looking at an invalid rune here, the next chunk // will be at end == 0 and the read process will stop. if offset > maxOffset-utf8.UTFMax+1 { break } tokenAPI.Input.Rune.AcceptMulti(rs[:end]...) } return ok } ok = true offset += w rs[end] = r end++ } if end > 0 { tokenAPI.Input.Rune.AcceptMulti(rs[:end]...) } if err != nil { if err == io.EOF { return ok } return false } } } } // MatchInvalidRune creates a Handler function that checks if an invalid // UTF8 rune can be read from the input. func MatchInvalidRune() Handler { return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && r == utf8.RuneError { tokenAPI.Input.Rune.Accept(r) return true } return false } } // MatchDigit creates a Handler that checks if a single digit can be read // from the input. func MatchDigit() Handler { return MatchCharRange('0', '9') } // MatchDigits creates a Handler that checks if one or more digits can be read // from the input. func MatchDigits() Handler { return func(tokenAPI *API) bool { // Check if the first character is a digit. b, err := tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } tokenAPI.Input.Byte.Accept(b) // Continue accepting bytes as long as they are digits. for { b, err := tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } tokenAPI.Input.Byte.Accept(b) } } } // MatchDigitNotZero creates a Handler that checks if a single digit not equal // to zero '0' can be read from the input. func MatchDigitNotZero() Handler { return MatchCharRange('1', '9') } // MatchInteger creates a Handler function that checks if a valid integer // can be read from the input. // // Leading zeroes are allowed. When the normalize parameter is true, these // will be stripped from the input. func MatchInteger(normalize bool) Handler { return func(tokenAPI *API) bool { // Check if the first character is a digit. b, err := tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { b2, err := tokenAPI.Input.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { tokenAPI.Input.Byte.MoveCursor('0') continue } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { tokenAPI.Input.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. tokenAPI.Input.Byte.MoveCursor('0') tokenAPI.Input.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { b, err := tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } tokenAPI.Input.Byte.Accept(b) } } } // MatchDecimal creates a Handler function that checks if a valid decimal value // can be read from the input. In case the fractional part is missing (which is // a valid decimal number), this Handler will report a match, so both "123" and // "123.123" will match. // // Leading zeroes are allowed. When the normalize parameter is true, these // will be stripped from the input. func MatchDecimal(normalize bool) Handler { return func(tokenAPI *API) bool { // Check if the first character is a digit. b, err := tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { b2, err := tokenAPI.Input.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { tokenAPI.Input.Byte.MoveCursor('0') continue } // The next character is a dot, go with the zero before the dot and // let the upcoming code handle the dot. if err == nil && b2 == '.' { tokenAPI.Input.Byte.Accept('0') break } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { tokenAPI.Input.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. tokenAPI.Input.Byte.MoveCursor('0') tokenAPI.Input.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { b, err = tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } tokenAPI.Input.Byte.Accept(b) } // No dot or no digit after a dot? Then we're done. if b != '.' { return true } b, err = tokenAPI.Input.Byte.Peek(1) if err != nil || b < '0' || b > '9' { return true } // Continue accepting bytes as long as they are digits. tokenAPI.Input.Byte.AcceptMulti('.', b) for { b, err = tokenAPI.Input.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } tokenAPI.Input.Byte.Accept(b) } return true } } // MatchBoolean creates a Handler function that checks if a boolean // value can be read from the input. It supports the boolean values as understood // by Go's strconv.ParseBool() function. // // True values: true, TRUE, True, 1, t, T // // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { return MatchAny( MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchChar('t'), MatchChar('T'), MatchChar('1'), MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchChar('f'), MatchChar('F'), MatchChar('0'), ) } // MatchASCII creates a Handler function that matches against any // ASCII value on the input. func MatchASCII() Handler { return MatchCharRange('\x00', '\x7F') } // MatchASCIILower creates a Handler function that matches against any // lower case ASCII letter on the input (a - z). func MatchASCIILower() Handler { return MatchCharRange('a', 'z') } // MatchASCIIUpper creates a Handler function that matches against any // upper case ASCII letter on the input (a - z). func MatchASCIIUpper() Handler { return MatchCharRange('A', 'Z') } // MatchUnicodeLetter creates a Handler function that matches against any // unicode letter on the input (see unicode.IsLetter(rune)). func MatchUnicodeLetter() Handler { return MatchRuneByCallback(unicode.IsLetter) } // MatchUnicodeUpper creates a Handler function that matches against any // upper case unicode letter on the input (see unicode.IsUpper(rune)). func MatchUnicodeUpper() Handler { return MatchRuneByCallback(unicode.IsUpper) } // MatchUnicodeLower creates a Handler function that matches against any // lower case unicode letter on the input (see unicode.IsLower(rune)). func MatchUnicodeLower() Handler { return MatchRuneByCallback(unicode.IsLower) } // MatchHexDigit creates a Handler function that check if a single hexadecimal // digit can be read from the input. func MatchHexDigit() Handler { return MatchCharRange('0', '9', 'a', 'f', 'A', 'F') } // MatchOctet creates a Handler function that checks if a valid octet value // can be read from the input (octet = byte value representation, with a value // between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming // digits, not if there's a non-digit after it, meaning that "123255" would be // a valid sequence of two octets. // // When the normalize parameter is set to true, then leading zeroes will be // stripped from the octet. func MatchOctet(normalize bool) Handler { return func(tokenAPI *API) bool { chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3) value := 0 start := 0 end := 0 for i, b := range chunk { if b < '0' || b > '9' { if i == 0 { return false } break } if b == '0' && value == 0 { start++ } else { value = value*10 + int(b-'0') } end++ } if value > 255 { return false } if normalize { if value == 0 { start-- } if start > 0 { tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...) } tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...) } else { tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...) } return true } } // MatchIPv4 creates a Handler function that checks if a valid IPv4 // IP address value can be read from the input. // // When the normalize parameter is true, IP-addresses that look like // "192.168.001.012" will be normalize to "192.168.1.12". func MatchIPv4(normalize bool) Handler { octet := MatchOctet(normalize) dot := MatchChar('.') return MatchSeq(octet, dot, octet, dot, octet, dot, octet) } // MatchIPv4CIDRMask creates a Handler function that checks if a // valid IPv4 CIDR mask (0 - 32) value can be read from the input. func MatchIPv4CIDRMask(normalize bool) Handler { return matchCIDRMask(32, normalize) } // MatchIPv4Netmask creates a Handler function that checks if a valid // IPv4 netmask can be read from input (e.g. 255.255.255.0). // Only a netmask in canonical form is accepted (meaning that in binary form // it start with zero or more 1-bits, followed by only 0-bits up to the // 32 bit length). // // When the normalize parameter is true, netmasks that look like // "255.255.192.000" will be normalized to "255.255.192.0". func MatchIPv4Netmask(normalize bool) Handler { octet := MakeUint8Token(nil, MatchOctet(normalize)) dot := MatchChar('.') netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) return func(tokenAPI *API) bool { if !netmask(tokenAPI) { return false } // Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes). val := tokenAPI.Output.TokenValue mask := net.IPv4Mask(val(0).(byte), val(1).(byte), val(2).(byte), val(3).(byte)) ones, bits := mask.Size() if ones == 0 && bits == 0 { return false } tokenAPI.Output.ClearTokens() return true } } // MatchIPv4Net creates a Handler function that checks the input for an // IPv4 + mask input. Both / (e.g. 192.168.0.1/24) and / // (e.g. 172.16.10.254/255.255.192.0) are acceptable. // // When the normalize parameter is true, then the IP address and the mask are // normalized. The mask will be normalized to cidr, so the above example would // be normalized to 172.16.10.254/18. func MatchIPv4Net(normalize bool) Handler { ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) slash := MatchChar('/') mask := MatchAny( MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)), MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize))) ipnet := MatchSeq(ip, slash, mask) return func(tokenAPI *API) bool { if !ipnet(tokenAPI) { return false } if !normalize { return true } maskToken := tokenAPI.Output.Token(1) val := tokenAPI.Output.TokenValue if maskToken.Type == "cidr" { tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), val(1).(uint8))) } else { o := strings.Split(val(1).(string), ".") b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) } mask := net.IPv4Mask(b(0), b(1), b(2), b(3)) bits, _ := mask.Size() tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), bits)) } tokenAPI.Output.ClearTokens() return true } } // MatchIPv6 creates a Handler function that checks if an IPv6 address // can be read from the input. func MatchIPv6(normalize bool) Handler { hextet := MatchMinMax(1, 4, MatchHexDigit()) colon := MatchChar(':') empty := MatchSeq(colon, colon) return func(tokenAPI *API) bool { nrOfHextets := 0 for nrOfHextets < 8 { if hextet(tokenAPI) { nrOfHextets++ } else if empty(tokenAPI) { nrOfHextets += 2 } else if !colon(tokenAPI) { break } } // No hextets or too many hextets (e.g. 1:1:1:1:1:1:1:: <-- since :: is 2 or more hextets). if nrOfHextets == 0 || nrOfHextets > 8 { return false } // Invalid IPv6, when net.ParseIP() cannot handle it. input := tokenAPI.Output.String() parsed := net.ParseIP(input) if parsed == nil { return false } if normalize { tokenAPI.Output.SetString(parsed.String()) } return true } } // MatchIPv6CIDRMask creates a Handler function that checks if a // valid IPv6 CIDR mask (0 - 128) value can be read from the input. func MatchIPv6CIDRMask(normalize bool) Handler { return matchCIDRMask(128, normalize) } func matchCIDRMask(bits int64, normalize bool) Handler { mask := MatchIntegerBetween(0, bits) if !normalize { return mask } return func(tokenAPI *API) bool { if !mask(tokenAPI) { return false } maskStr := tokenAPI.Output.String() bits, _ := strconv.Atoi(maskStr) tokenAPI.Output.SetString(fmt.Sprintf("%d", bits)) return true } } // MatchIPv6Net creates a Handler function that checks the input for an // IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64. // // When the normalize parameter is true, then the IP address and the mask are // normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64. func MatchIPv6Net(normalize bool) Handler { ip := MatchIPv6(normalize) slash := MatchChar('/') mask := MatchIPv6CIDRMask(normalize) return MatchSeq(ip, slash, mask) } // ModifyDrop creates a Handler that checks if the provided Handler applies. // If it does, then its output is disposed completely. // // Note that if the Handler does not apply, a mismatch will be reported back, // even though we would have dropped the output anyway. So if you would like // to drop optional blanks (spaces and tabs), then use something like: // // M.Drop(C.Optional(A.Blanks)) // // instead of: // // M.Drop(A.Blanks) // // Since A.Blanks is defined as "1 or more spaces and/or tabs", the input // string "bork" would not match against the second form, but " bork" would. // In both cases, it would match the first form. func ModifyDrop(handler Handler) Handler { return func(tokenAPI *API) bool { tokenAPI.Output.Suspend() ok := handler(tokenAPI) tokenAPI.Output.Resume() return ok } } // ModifyTrim creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from both the left and the right of the output. func ModifyTrim(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, true, true) } // ModifyTrimLeft creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from the left of the output. func ModifyTrimLeft(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, true, false) } // ModifyTrimRight creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from the right of the output. func ModifyTrimRight(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, false, true) } func modifyTrim(handler Handler, cutset string, trimLeft bool, trimRight bool) Handler { modfunc := func(s string) string { if trimLeft { s = strings.TrimLeft(s, cutset) } if trimRight { s = strings.TrimRight(s, cutset) } return s } return ModifyByCallback(handler, modfunc) } // ModifyTrimSpace creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and all leading and trailing whitespace characters, // as defined by Unicode are removed from it. func ModifyTrimSpace(handler Handler) Handler { return ModifyByCallback(handler, strings.TrimSpace) } // ModifyToUpper creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are converted into upper case. func ModifyToUpper(handler Handler) Handler { return ModifyByCallback(handler, strings.ToUpper) } // ModifyToLower creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are converted into lower case. func ModifyToLower(handler Handler) Handler { return ModifyByCallback(handler, strings.ToLower) } // ModifyReplace creates a Handler that checks if the provided Handler applies. // If it does, then its output is replaced by the provided string. func ModifyReplace(handler Handler, replaceWith string) Handler { return ModifyByCallback(handler, func(string) string { return replaceWith }) } // ModifyByCallback creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and it is fed to the provided modfunc. // This is a simple function that takes a string on input and returns a possibly // modified string on output. The return value of the modfunc will replace the // resulting output. func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() split := tokenAPI.SplitOutput() if handler(tokenAPI) { origS := tokenAPI.Output.String() s := modfunc(origS) if s != origS { tokenAPI.Output.SetString(s) } tokenAPI.MergeSplitOutput(split) return true } tokenAPI.RestoreSnapshot(snap) return false } } // MakeStrLiteralToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to a string-typed // representation of the read Runes. This string is literal, meaning that an // escape sequence like "\n" is kept as-is (a backslash character, followed by // an 'n'-character). func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { literal := tokenAPI.Output.String() return literal }) } // MakeStrInterpretedToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to a string-typed // representation of the read Runes. This string is interpreted, meaning that an // escape sequence like "\n" is translated to an actual newline control character func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING interpreted, _ := interpretString(tokenAPI.Output.String()) return interpreted }) } // TODO I think here I can win some speed by using the methods from, I think, the parse2 solution. func interpretString(str string) (string, error) { var sb strings.Builder for len(str) > 0 { r, _, remainder, err := strconv.UnquoteChar(str, '"') if err != nil { return sb.String(), err } str = remainder sb.WriteRune(r) } return sb.String(), nil } // MakeRuneToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to a Rune-representation // of the read Rune. func MakeRuneToken(toktype interface{}, handler Handler) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING --- not a 1 rune input return tokenAPI.Output.Rune(0) }) } // MakeByteToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to a Byte-representation // of the read Rune. func MakeByteToken(toktype interface{}, handler Handler) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING --- not a 1 byte input return byte(tokenAPI.Output.Rune(0)) }) } // MakeIntToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int-representation // of the read Rune. func MakeIntToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken("int", toktype, handler, func(s string) (interface{}, error) { return strconv.Atoi(s) }) } // MakeInt8Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int8-representation // of the read Rune. // TODO allow other Go types for oct and hex too. func MakeInt8Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("int8", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 8) if err == nil { return int8(value), err } return value, err }) } // MakeInt16Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int16-representation // of the read Rune. func MakeInt16Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("int16", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 16) if err == nil { return int16(value), err } return value, err }) } // MakeInt32Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int32-representation // of the read Rune. func MakeInt32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("int32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 32) if err == nil { return int32(value), err } return value, err }) } // MakeInt64BaseToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int64-representation // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // 10 = decimal, 16 = hexadecimal). func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { return makeInt64BaseToken(toktype, base, handler) } func makeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { return makeStrconvToken("int64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, base, 64) if err == nil { return int64(value), err } return value, err }) } // MakeInt64Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an int64-representation // of the read Rune. func MakeInt64Token(toktype interface{}, handler Handler) Handler { return MakeInt64BaseToken(toktype, 10, handler) } // MakeUintToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint-representation // of the read Rune. func MakeUintToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken("uint", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 0) if err == nil { return uint(value), err } return value, err }) } // MakeUint8Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint8-representation // of the read Rune. // TODO allow other Go types for oct and hex too. func MakeUint8Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("uint8", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 8) if err == nil { return uint8(value), err } return value, err }) } // MakeUint16Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint16-representation // of the read Rune. func MakeUint16Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("uint16", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 16) if err == nil { return uint16(value), err } return value, err }) } // MakeUint32Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint32-representation // of the read Rune. func MakeUint32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("unit32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 32) if err == nil { return uint32(value), err } return value, err }) } // MakeUint64BaseToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint64-representation // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // 10 = decimal, 16 = hexadecimal). func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler { return makeStrconvToken("uint64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, base, 64) if err == nil { return uint64(value), err } return value, err }) } // MakeUint64Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an uint64-representation // of the read Rune. func MakeUint64Token(toktype interface{}, handler Handler) Handler { return MakeUint64BaseToken(toktype, 10, handler) } // MakeFloat32Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an float32-representation // of the read Rune. func MakeFloat32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("float32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 32) if err == nil { return float32(value), err } return value, err }) } // MakeFloat64Token creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an float64-representation // of the read Rune. func MakeFloat64Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken("float64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 64) if err == nil { return float64(value), err } return value, err }) } // MakeBooleanToken creates a Handler that will add a Token to the // Result, for which the Token.Value is set to an bool-representation // of the read Rune. func MakeBooleanToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken("boolean", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseBool(s) if err == nil { return bool(value), err } return value, err }) } func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { value, err := convert(tokenAPI.Output.String()) if err != nil { // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? panic(fmt.Sprintf("%s token invalid (%s)", name, err)) } return value }) } // MakeTokenByValue creates a Handler that will add a static Token value // to the Result. func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler { return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { return value }) } // MakeTokenByCallback creates a Handler that will add a Token to the // Result, for which the Token.Value is to be generated by the provided // makeValue() callback function. The function gets the current API as // its input and must return the token value. func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { // When a parsing hierarchy looks like ("date" ("year", "month" "day")), the // tokens must end up in the order "date", "year", "month", "day" and not // "year", "month", "day", "date". Therefore (since the inner tokens have already // been produced at this point) we have to insert this token before any tokens // that were already created by the handler call. token := Token{Type: toktype, Value: makeValue(tokenAPI)} tokenAPI.Output.InsertTokenAtStart(token) return true } return false } } // MakeTokenGroup checks if the provided handler matches the input. If yes, then it will // take the tokens as produced by the handler and group them together in a single token. func MakeTokenGroup(toktype interface{}, handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { tokens := tokenAPI.Output.Tokens() tokensCopy := make([]Token, len(tokens)) copy(tokensCopy, tokens) tokenAPI.Output.SetTokens(Token{Type: toktype, Value: tokensCopy}) return true } return false } }