lib/dexter/indexer.rb in pgdexter-0.1.5 vs lib/dexter/indexer.rb in pgdexter-0.1.6

- old
+ new

@@ -5,15 +5,17 @@ def initialize(database_url, options) @database_url = database_url @create = options[:create] @log_level = options[:log_level] @exclude_tables = options[:exclude] + @include_tables = Array(options[:include].split(",")) if options[:include] @log_sql = options[:log_sql] @log_explain = options[:log_explain] @min_time = options[:min_time] || 0 create_extension unless extension_exists? + execute("SET lock_timeout = '5s'") end def process_stat_statements queries = stat_statements.map { |q| Query.new(q) }.sort_by(&:fingerprint).group_by(&:fingerprint).map { |_, v| v.first } log "Processing #{queries.size} new query fingerprints" @@ -28,10 +30,14 @@ tables = possible_tables(queries) queries.each do |query| query.missing_tables = !query.tables.all? { |t| tables.include?(t) } end + if @include_tables + tables = Set.new(tables.to_a & @include_tables) + end + # exclude user specified tables # TODO exclude write-heavy tables @exclude_tables.each do |table| tables.delete(table) end @@ -41,11 +47,11 @@ # create hypothetical indexes and explain queries candidates = tables.any? ? create_hypothetical_indexes(queries.reject(&:missing_tables), tables) : {} # see if new indexes were used and meet bar - new_indexes = determine_indexes(queries, candidates) + new_indexes = determine_indexes(queries, candidates, tables) # display and create new indexes show_and_create_indexes(new_indexes) end @@ -53,10 +59,12 @@ def create_extension execute("SET client_min_messages = warning") begin execute("CREATE EXTENSION IF NOT EXISTS hypopg") + rescue PG::UndefinedFile + abort "Install HypoPG first: https://github.com/ankane/dexter#installation" rescue PG::InsufficientPrivilege abort "Use a superuser to run: CREATE EXTENSION hypopg" end end @@ -113,58 +121,105 @@ end end end def create_hypothetical_indexes(queries, tables) + candidates = {} + # get initial costs for queries calculate_plan(queries) - explainable_queries = queries.select(&:explainable?) + explainable_queries = queries.select { |q| q.explainable? && q.high_cost? } - # get existing indexes - index_set = Set.new - indexes(tables).each do |index| - # TODO make sure btree - index_set << [index["table"], index["columns"]] - end + # filter tables for performance + tables = Set.new(explainable_queries.flat_map(&:tables)) - # create hypothetical indexes - candidates = {} - columns_by_table = columns(tables).group_by { |c| c[:table] } + if tables.any? + # get existing indexes + index_set = Set.new + indexes(tables).each do |index| + # TODO make sure btree + index_set << [index["table"], index["columns"]] + end - # create single column indexes - create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates) + # since every set of multi-column indexes are expensive + # try to parse out columns + possible_columns = Set.new + explainable_queries.each do |query| + find_columns(query.tree).each do |col| + last_col = col["fields"].last + if last_col["String"] + possible_columns << last_col["String"]["str"] + end + end + end - # get next round of costs - calculate_plan(explainable_queries) + # create hypothetical indexes + columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] } - # create multicolumn indexes - create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates) + # create single column indexes + create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates) - # get next round of costs - calculate_plan(explainable_queries) + # get next round of costs + calculate_plan(explainable_queries) + # create multicolumn indexes + create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates) + + # get next round of costs + calculate_plan(explainable_queries) + end + candidates end - def determine_indexes(queries, candidates) + def find_columns(plan) + find_by_key(plan, "ColumnRef") + end + + def find_indexes(plan) + find_by_key(plan, "Index Name") + end + + def find_by_key(plan, key) + indexes = [] + case plan + when Hash + plan.each do |k, v| + if k == key + indexes << v + else + indexes.concat(find_by_key(v, key)) + end + end + when Array + indexes.concat(plan.flat_map { |v| find_by_key(v, key) }) + end + indexes + end + + def determine_indexes(queries, candidates, tables) new_indexes = {} + index_name_to_columns = candidates.invert queries.each do |query| - if query.explainable? + if query.explainable? && query.high_cost? new_cost, new_cost2 = query.costs[1..2] cost_savings = new_cost < query.initial_cost * 0.5 # set high bar for multicolumn indexes cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5 query.new_cost = cost_savings2 ? new_cost2 : new_cost query_indexes = [] - candidates.each do |col_set, index_name| - key = cost_savings2 ? 2 : 1 + key = cost_savings2 ? 2 : 1 + indexes = find_indexes(query.plans[key]).uniq.sort - if query.plans[key].inspect.include?(index_name) + indexes.each do |index_name| + col_set = index_name_to_columns[index_name] + + if col_set index = { table: col_set[0][:table], columns: col_set.map { |c| c[:column] } } query_indexes << index @@ -177,11 +232,15 @@ end end if @log_level == "debug2" log "Processed #{query.fingerprint}" - if query.explainable? + if tables.empty? + log "No candidate tables for indexes" + elsif query.explainable? && !query.high_cost? + log "Low initial cost: #{query.initial_cost}" + elsif query.explainable? log "Cost: #{query.initial_cost} -> #{query.new_cost}" if query_indexes.any? log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}" log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2 @@ -231,11 +290,15 @@ new_indexes.each do |index| unless index_exists?(index) statement = "CREATE INDEX CONCURRENTLY ON #{quote_ident(index[:table])} (#{index[:columns].map { |c| quote_ident(c) }.join(", ")})" log "Creating index: #{statement}" started_at = Time.now - execute(statement) - log "Index created: #{((Time.now - started_at) * 1000).to_i} ms" + begin + execute(statement) + log "Index created: #{((Time.now - started_at) * 1000).to_i} ms" + rescue PG::LockNotAvailable => e + log "Could not acquire lock: #{index[:table]}" + end end end end end else