# This is a more sophisticated sharding method based on a two layer database-backed # blocks map that holds block-shard associations. Record blocks are mapped to tablegroups # and groups are mapped to shards. # # It automatically creates new blocks for new keys and assigns them to existing groups. # Warning: make sure to create at least one shard and one group before inserting any records. # module DbCharmer module Sharding module Method module DbBlockGroupMapBase #--------------------------------------------------------------------------------------------------------------- # Sharder name attr_accessor :name # Mapping db connection attr_accessor :connection, :connection_name # Mapping table name attr_accessor :map_table # Tablegroups table name attr_accessor :groups_table # Shards table name attr_accessor :shards_table # Sharding keys block size attr_accessor :block_size def initialize(config) @name = config[:name] or raise(ArgumentError, "Missing required :name parameter!") @connection = DbCharmer::ConnectionFactory.connect(config[:connection], true) if self.is_a?(DbCharmer::Sharding::Method::DbBlockSchemaMap) && ::ActiveRecord::Base.configurations[DbCharmer.env]['adapter'] != 'postgresql' raise(ArgumentError, 'DbBlockSchemaMap method can only be used with the postgresql adapter') end @block_size = (config[:block_size] || 10000).to_i @map_table = config[:map_table] or raise(ArgumentError, "Missing required :map_table parameter!") @groups_table = config[:groups_table] or raise(ArgumentError, "Missing required :groups_table parameter!") @shards_table = config[:shards_table] or raise(ArgumentError, "Missing required :shards_table parameter!") # Local caches @shard_info_cache = {} @group_info_cache = {} @blocks_cache = Rails.cache @blocks_cache_prefix = config[:blocks_cache_prefix] || "#{@name}_block:" end #--------------------------------------------------------------------------------------------------------------- def shard_for_key(key) block = block_for_key(key) # Auto-allocate new blocks block ||= allocate_new_block_for_key(key) raise ArgumentError, "Invalid key value, no shards found for this key and could not create a new block!" unless block # Load shard group_id = block['group_id'].to_i shard_info = shard_info_by_group_id(group_id) # Get config shard_connection_config(shard_info, group_id) end #--------------------------------------------------------------------------------------------------------------- # Returns a block for a key def block_for_key(key, cache = true) # Cleanup the cache if asked to key_range = [ block_start_for_key(key), block_end_for_key(key) ] block_cache_key = "%d-%d" % key_range if cache cached_block = get_cached_block(block_cache_key) return cached_block if cached_block end # Fetch cached value or load from db block = begin sql = "SELECT * FROM #{map_table} WHERE start_id = #{key_range.first} AND end_id = #{key_range.last} LIMIT 1" connection.select_one(sql, 'Find a shard block') end set_cached_block(block_cache_key, block) return block end #--------------------------------------------------------------------------------------------------------------- def get_cached_block(block_cache_key) @blocks_cache.read("#{@blocks_cache_prefix}#{block_cache_key}") end def set_cached_block(block_cache_key, block) @blocks_cache.write("#{@blocks_cache_prefix}#{block_cache_key}", block) end #--------------------------------------------------------------------------------------------------------------- # Load group info def group_info_by_id(group_id, cache = true) # Cleanup the cache if asked to @group_info_cache[group_id] = nil unless cache # Either load from cache or from db @group_info_cache[group_id] ||= begin prepare_shard_models group_class.find_by_id(group_id) end end # Load shard info def shard_info_by_id(shard_id, cache = true) # Cleanup the cache if asked to @shard_info_cache[shard_id] = nil unless cache # Either load from cache or from db @shard_info_cache[shard_id] ||= begin prepare_shard_models shard_class.find_by_id(shard_id) end end def clear_shard_info_cache @shard_info_cache = {} end def clear_group_info_cache @group_info_cache = {} end # Load shard info using mapping info for a group def shard_info_by_group_id(group_id) # Load group group_info = group_info_by_id(group_id) raise ArgumentError, "Invalid group_id: #{group_id}" unless group_info shard_info = shard_info_by_id(group_info.shard_id) raise ArgumentError, "Invalid shard_id: #{group_info.shard_id}" unless shard_info return shard_info end #--------------------------------------------------------------------------------------------------------------- def allocate_new_block_for_key(key) # Can't find any groups to use for blocks allocation! return nil unless group = least_loaded_group # Figure out block limits start_id = block_start_for_key(key) end_id = block_end_for_key(key) # Try to insert a new mapping (ignore duplicate key errors) sql = <<-SQL INSERT INTO #{map_table} (start_id, end_id, group_id, block_size, created_at, updated_at) VALUES (#{start_id}, #{end_id}, #{group.id}, #{block_size}, NOW(), NOW()) SQL connection.execute(sql, "Allocate new block") # Increment the blocks counter on the shard group_class.update_counters(group.id, :blocks_count => +1) # Retry block search after creation block_for_key(key) end def least_loaded_group prepare_shard_models # Select group group = group_class.first(:conditions => { :enabled => true, :open => true }, :order => 'blocks_count ASC') raise "Can't find any tablegroups to use for blocks allocation!" unless group return group end #--------------------------------------------------------------------------------------------------------------- def block_start_for_key(key) block_size.to_i * (key.to_i / block_size.to_i) end def block_end_for_key(key) block_size.to_i + block_start_for_key(key) end def group_class if self.is_a?(DbCharmer::Sharding::Method::DbBlockGroupMap) "DbCharmer::Sharding::Method::DbBlockGroupMap::Group".classify.constantize elsif self.is_a?(DbCharmer::Sharding::Method::DbBlockSchemaMap) "DbCharmer::Sharding::Method::DbBlockSchemaMap::Group".classify.constantize end end def shard_class if self.is_a?(DbCharmer::Sharding::Method::DbBlockGroupMap) "DbCharmer::Sharding::Method::DbBlockGroupMap::Shard".classify.constantize elsif self.is_a?(DbCharmer::Sharding::Method::DbBlockSchemaMap) "DbCharmer::Sharding::Method::DbBlockSchemaMap::Shard".classify.constantize end end def shard_connections # Find all groups prepare_shard_models groups = group_class.all(:conditions => { :enabled => true }, :include => :shard) # Map them to shards groups.map { |group| shard_connection_config(group.shard, group.id) } end # Prepare model for working with our shards table def prepare_shard_models shard_class.switch_connection_to(connection) shard_class.set_table_name(shards_table) group_class.switch_connection_to(connection) group_class.set_table_name(groups_table) end # This connections settings can be used to drop and create databases def shard_connection_config_no_dbname(shard) # Format connection name connection_name = "db_charmer_db_block_group_map_#{name}_s%d_no_db" % shard.id connection.instance_variable_get(:@config).clone.merge( # Name for the connection factory :connection_name => connection_name, # Connection params :host => shard.db_host, :port => shard.db_port, :username => shard.db_user, :password => shard.db_pass, :database => nil, :schema_name => '' ) end def create_shard_database(shard) conn_config = shard_connection_config_no_dbname(shard) old_proxy = ::ActiveRecord::Base.db_charmer_connection_proxy ::ActiveRecord::Base.switch_connection_to(conn_config) sql = "SELECT datname FROM pg_database WHERE datname='#{shard.db_name}'" existing_dbs = ::ActiveRecord::Base.connection.execute(sql) unless existing_dbs.first ::ActiveRecord::Base.connection.create_database(shard.db_name, conn_config) end ::ActiveRecord::Base.switch_connection_to(old_proxy) end def drop_shard_database(shard) conn_config = shard_connection_config_no_dbname(shard) old_proxy = ::ActiveRecord::Base.db_charmer_connection_proxy ::ActiveRecord::Base.switch_connection_to(conn_config) ::ActiveRecord::Base.connection.drop_database(shard.db_name) ::ActiveRecord::Base.switch_connection_to(old_proxy) end def drop_all_shard_databases prepare_shard_models shard_class.all.each do |shard| drop_shard_database(shard) end end end end end end