# frozen_string_literal: true require "redis" module Honeycomb module Redis # Patches Redis with the option to configure the Honeycomb client. # # When you load this integration, each Redis call will be wrapped in a span # containing information about the command being invoked. # # This module automatically gets mixed into the Redis class so you can # change the underlying {Honeycomb::Client}. By default, we use the global # {Honeycomb.client} to send events. A nil client will disable the # integration altogether. # # @example Custom client # Redis.honeycomb_client = Honeycomb::Client.new(...) # # @example Disabling instrumentation # Redis.honeycomb_client = nil module Configuration attr_writer :honeycomb_client def honeycomb_client return @honeycomb_client if defined?(@honeycomb_client) Honeycomb.client end end # Patches Redis::Client with Honeycomb instrumentation. # # Circa versions 3.x and 4.x of their gem, the Redis class is backed by an # underlying Redis::Client object. The methods used to send commands to the # Redis server - namely Redis::Client#call, Redis::Client#call_loop, # Redis::Client#call_pipeline, Redis::Client#call_pipelined, # Redis::Client#call_with_timeout, and Redis::Client#call_without_timeout - # all eventually wind up calling the Redis::Client#process method to do the # "dirty work" of writing commands out to an underlying connection. So this # gives us a single point of entry that's ideal for introducing the # Honeycomb span. # # An alternative interface provided since at least version 3.0.0 is # Redis::Distributed. Underneath, though, it maintains a collection of # Redis objects, and each call is forwarded to one or more members of the # collection. So patching Redis::Client still captures spans originating # from Redis::Distributed. Typical commands (i.e., ones that aren't # "global" like `QUIT` or `FLUSHALL`) forward to just a single node anyway, # so there's not much use to wrapping everything up in a span for the # Redis::Distributed method call. # # Another alternative interface provided since v4.0.3 is Redis::Cluster, # which you can configure the Redis class to use instead of Redis::Client. # Again, though, Redis::Cluster maintains a collection of Redis::Client # instances underneath. The tracing needs wind up being pretty much the # same as Redis::Distributed, even though the actual architecture is # significantly different. # # An implementation detail of pub/sub commands since v2.0.0 (well below our # supported version of the redis gem!) is Redis::SubscribedClient, but that # still wraps an underlying Redis::Client or Redis::Cluster instance. # # @see https://github.com/redis/redis-rb/blob/2e8577ad71d0efc32f31fb034f341e1eb10abc18/lib/redis/client.rb#L77-L180 # Relevant Redis::Client methods circa v3.0.0 # @see https://github.com/redis/redis-rb/blob/a2c562c002bc8f86d1f47818d63db2da1c5c3d3f/lib/redis/client.rb#L124-L239 # Relevant Redis::Client methods circa v4.1.3 # @see https://github.com/redis/redis-rb/commits/master/lib/redis/client.rb # History of Redis::Client # # @see https://redis.io/topics/partitioning # Partitioning (the basis for Redis::Distributed) # @see https://github.com/redis/redis-rb/blob/2e8577ad71d0efc32f31fb034f341e1eb10abc18/lib/redis/distributed.rb # Redis::Distributed circa v3.0.0 # @see https://github.com/redis/redis-rb/blob/a2c562c002bc8f86d1f47818d63db2da1c5c3d3f/lib/redis/distributed.rb # Redis::Distributed circa v4.1.3 # @see https://github.com/redis/redis-rb/commits/master/lib/redis/distributed.rb # History of Redis::Distributed # # @see https://redis.io/topics/cluster-spec # Clustering (the basis for Redis::Cluster) # @see https://github.com/redis/redis-rb/commit/7f48c0b02fa89256167bc481a73ce2e0c8cca89a # Initial implementation of Redis::Cluster released in v4.0.3 # @see https://github.com/redis/redis-rb/blob/a2c562c002bc8f86d1f47818d63db2da1c5c3d3f/lib/redis/cluster.rb # Redis::Cluster circa v4.1.3 # @see https://github.com/redis/redis-rb/commits/master/lib/redis/cluster.rb # History of Redis::Cluster # # @see https://redis.io/topics/pubsub # Pub/Sub in Redis # @see https://github.com/redis/redis-rb/blob/17d40d80388b536ec53a8f19bb1404e93a61650f/lib/redis/subscribe.rb # Redis::SubscribedClient circa v2.0.0 # @see https://github.com/redis/redis-rb/blob/2e8577ad71d0efc32f31fb034f341e1eb10abc18/lib/redis/subscribe.rb # Redis::SubscribedClient circa v3.0.0 # @see https://github.com/redis/redis-rb/blob/a2c562c002bc8f86d1f47818d63db2da1c5c3d3f/lib/redis/subscribe.rb # Redis::SubscribedClient circa v4.1.3 module Client def process(commands) return super if ::Redis.honeycomb_client.nil? span = ::Redis.honeycomb_client.start_span(name: "redis") begin fields = Fields.new(self) fields.options = @options fields.command = commands span.add fields super rescue StandardError => e span.add_field "redis.error", e.class.name span.add_field "redis.error_detail", e.message raise ensure span.send end end end # This structure contains the fields we'll add to each Redis span. # # The logic is in this class to avoid monkey-patching extraneous APIs into # the Redis::Client via {Client}. # # @private class Fields def initialize(client) @client = client end def options=(options) options.each do |option, value| values["redis.#{option}"] ||= value unless ignore?(option) end end def command=(commands) commands = Array(commands) values["redis.command"] = commands.map { |cmd| format(cmd) }.join("\n") end def to_hash values end private def values @values ||= { "meta.package" => "redis", "meta.package_version" => ::Redis::VERSION, "redis.id" => @client.id, "redis.location" => @client.location, } end # Do we ignore this Redis::Client option? # # * :url - unsafe because it might contain a password # * :password - unsafe # * :logger - just some Ruby object, not useful # * :_parsed - implementation detail def ignore?(option) # Redis options may be symbol or string keys. # # This normalizes `option` using `to_sym` as benchmarking on Ruby MRI # v2.6.6 and v2.7.3 has shown that was faster compared to `to_s`. # However, `nil` does not support `to_sym`. This uses a guard clause to # handle the `nil` case because this is still faster than safe # navigation. Also this lib still supports Ruby 2.2.0; which does not # include safe navigation. return true unless option %i[url password logger _parsed].include?(option.to_sym) end def format(cmd) name, *args = cmd.flatten(1) name = resolve(name) sanitize(args) if name.casecmp("auth").zero? [name.upcase, *args.map { |arg| prettify(arg) }].join(" ") end def resolve(name) @client.command_map.fetch(name, name).to_s end def sanitize(args) args.map! { "[sanitized]" } end # This aims to replicate the algorithms used by redis-cli. # # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L940-L1067 # The redis-cli parsing algorithm # # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L878-L907 # The redis-cli printing algorithm def prettify(arg) pretty = arg.to_s.dup pretty.encode!("UTF-8", "binary", fallback: ->(c) { hex(c) }) pretty.gsub!(NEEDS_BACKSLASH, BACKSLASH) pretty.gsub!(NEEDS_HEX) { |c| hex(c) } NEEDS_QUOTES.match?(pretty) ? "\"#{pretty}\"" : pretty end # A regular expression matching characters that need to be hex-encoded. # # This replicates the C isprint() function that redis-cli uses to decide # whether to escape a character in hexadecimal notation, "\xhh". Any # non-printable character must be represented as a hex escape sequence. # # Normally, we could match this using a negated POSIX bracket expression: # # /[^[:print:]]/ # # You can read that as "not printable". # # However, in Ruby, these character classes also encompass non-ASCII # characters. In contrast, since most platforms have 8-bit `char` types, # the C isprint() function generally does not recognize any Unicode code # points. This effectively limits the redis-cli interpretation of the # printable character range to just printable ASCII characters. # # Thus, we match using a combination of the previous regexp with a # non-POSIX character class that Ruby defines: # # /[^[:print:]&&[:ascii:]]/ # # You can read this like # # NOT (printable AND ascii) # # which by DeMorgan's Law is equivalent to # # (NOT printable) OR (NOT ascii) # # That is, if the character is not printable (even in Unicode), we'll # escape it; if the character is printable but non-ASCII, we'll also # escape it. # # What's more, Ruby's Regexp#=~ method will blow up if the string does # not have a valid encoding (e.g., in UTF-8). We handle this case # separately, though, using String#encode! with a :fallback option to # hex-encode invalid UTF-8 byte sequences with {#hex}. # # @see https://ruby-doc.org/core-2.6.5/Regexp.html # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L878-L880 # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L898-L901 # @see https://www.justinweiss.com/articles/3-steps-to-fix-encoding-problems-in-ruby/ NEEDS_HEX = /[^[:print:]&&[:ascii:]]/.freeze # A regular expression for characters that need to be backslash-escaped. # # Any match of this regexp will be substituted according to the # {BACKSLASH} table. This includes standard C escape sequences (newlines, # tabs, etc) as well as a couple special considerations: # # 1. Because {#prettify} will output double quoted strings if any # escaping is needed, we must match double quotes (") so they'll be # replaced by escaped quotes (\"). # # 2. Backslashes themselves get backslash-escaped, so \ becomes \\. # However, strings with invalid UTF-8 encoding will blow up when we # try to use String#gsub!, so {#prettify} must first use # String#encode! to scrub out invalid characters. It does this by # replacing invalid bytes with hex-encoded escape sequences using # {#hex}. This will insert sequences like \xhh, which contains a # backslash that we *don't* want to escape. # # Unfortunately, this regexp can't really distinguish between # backslashes in the original input vs backslashes resulting from the # UTF-8 fallback. We make an effort by using a negative lookahead. # That way, only backslashes that *aren't* followed by x + hex digit + # hex digit will be escaped. NEEDS_BACKSLASH = /["\n\r\t\a\b]|\\(?!x\h\h)/.freeze # A lookup table for backslash-escaped characters. # # This is used by {#prettify} to replicate the hard-coded `case` # statements in redis-cli. As of this writing, Redis recognizes a handful # of standard C escape sequences, like "\n" for newlines. # # Because {#prettify} will output double quoted strings if any escaping # is needed, this table must additionally consider the double-quote to be # a backslash-escaped character. For example, instead of generating # # '"hello"' # # we'll generate # # "\"hello\"" # # even though redis-cli would technically recognize the single-quoted # version. # # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L888-L896 # The redis-cli algorithm for outputting standard escape sequences BACKSLASH = { "\\" => "\\\\", '"' => '\\"', "\n" => "\\n", "\r" => "\\r", "\t" => "\\t", "\a" => "\\a", "\b" => "\\b", }.freeze # If the final escaped string needs quotes, it will match this regexp. # # The overall string returned by {#prettify} should only be quoted if at # least one of the following holds: # # 1. The string contains an escape sequence, broadly demarcated by a # backslash. This includes standard escape sequences like "\n" and # "\t" as well as hex-encoded bytes using the "\x" escape sequence. # Since {#prettify} uses double quotes on its output string, we must # also force quotes if the string itself contains a literal # double quote. This double quote behavior is handled tacitly by the # {NEEDS_BACKSLASH} + {BACKSLASH} replacement. # # 2. The string contains a single quote. Since redis-cli recognizes # single-quoted strings, we want to wrap the {#prettify} output in # double quotes so that the literal single quote character isn't # mistaken as the delimiter of a new string. # # 3. The string contains any whitespace characters. If the {#prettify} # output weren't wrapped in quotes, whitespace would act as a # separator between arguments to the Redis command. To group things # together, we need to quote the string. NEEDS_QUOTES = /[\\'\s]/.freeze # Hex-encodes a (presumably non-printable or non-ASCII) character. # # Aside from standard backslash escape sequences, redis-cli also # recognizes "\xhh" notation, where `hh` is a hexadecimal number. # # Of note is that redis-cli only recognizes *exactly* two-digit # hexadecimal numbers. This is in accordance with IEEE Std 1003.1-2001, # Chapter 7, Locale: # # > A character can be represented as a hexadecimal constant. A # > hexadecimal constant shall be specified as the escape character # > followed by an 'x' followed by two hexadecimal digits. Each constant # > shall represent a byte value. Multi-byte values can be represented by # > concatenated constants specified in byte order with the last constant # > specifying the least significant byte of the character. # # Unlike the C `char` type, Ruby's conception of a character can span # multiple bytes (and possibly bytes that aren't valid in Ruby's string # encoding). So we take care to escape the input properly into the # redis-cli compatible version by iterating through each byte and # formatting it as a (zero-padded) 2-digit hexadecimal number prefixed by # `\x`. # # @see https://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L878-L880 # @see https://github.com/antirez/redis/blob/0f026af185e918a9773148f6ceaa1b084662be88/src/sds.c#L898-L901 def hex(char) char.bytes.map { |b| Kernel.format("\\x%02x", b) }.join end end end end Redis.extend(Honeycomb::Redis::Configuration) Redis::Client.prepend(Honeycomb::Redis::Client)