lib/dexter/indexer.rb in pgdexter-0.2.0 vs lib/dexter/indexer.rb in pgdexter-0.2.1

- old
+ new

@@ -50,11 +50,11 @@ # see if new indexes were used and meet bar new_indexes = determine_indexes(queries, candidates, tables) # display and create new indexes - show_and_create_indexes(new_indexes) + show_and_create_indexes(new_indexes, queries, tables) end private def create_extension @@ -131,17 +131,10 @@ # filter tables for performance tables = Set.new(explainable_queries.flat_map(&:tables)) if tables.any? - # get existing indexes - index_set = Set.new - indexes(tables).each do |index| - # TODO make sure btree - index_set << [index["table"], index["columns"]] - end - # since every set of multi-column indexes are expensive # try to parse out columns possible_columns = Set.new explainable_queries.each do |query| find_columns(query.tree).each do |col| @@ -154,17 +147,17 @@ # create hypothetical indexes columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] } # create single column indexes - create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates) + create_hypothetical_indexes_helper(columns_by_table, 1, candidates) # get next round of costs calculate_plan(explainable_queries) # create multicolumn indexes - create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates) + create_hypothetical_indexes_helper(columns_by_table, 2, candidates) # get next round of costs calculate_plan(explainable_queries) end @@ -194,92 +187,157 @@ indexes.concat(plan.flat_map { |v| find_by_key(v, key) }) end indexes end + def hypo_indexes_from_plan(index_name_to_columns, plan, index_set) + query_indexes = [] + + find_indexes(plan).uniq.sort.each do |index_name| + col_set = index_name_to_columns[index_name] + + if col_set + index = { + table: col_set[0][:table], + columns: col_set.map { |c| c[:column] } + } + + unless index_set.include?([index[:table], index[:columns]]) + query_indexes << index + end + end + end + + query_indexes + end + def determine_indexes(queries, candidates, tables) new_indexes = {} index_name_to_columns = candidates.invert + # filter out existing indexes + # this must happen at end of process + # since sometimes hypothetical indexes + # can give lower cost than actual indexes + index_set = Set.new + if tables.any? + indexes(tables).each do |index| + if index["using"] == "btree" + # don't add indexes that are already covered + index_set << [index["table"], index["columns"].first(1)] + index_set << [index["table"], index["columns"].first(2)] + end + end + end + queries.each do |query| if query.explainable? && query.high_cost? new_cost, new_cost2 = query.costs[1..2] cost_savings = new_cost < query.initial_cost * 0.5 + # set high bar for multicolumn indexes cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5 - query.new_cost = cost_savings2 ? new_cost2 : new_cost - - query_indexes = [] key = cost_savings2 ? 2 : 1 - indexes = find_indexes(query.plans[key]).uniq.sort + query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set) - indexes.each do |index_name| - col_set = index_name_to_columns[index_name] + # likely a bad suggestion, so try single column + if cost_savings2 && query_indexes.size > 1 + query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set) + cost_savings2 = false + end - if col_set - index = { - table: col_set[0][:table], - columns: col_set.map { |c| c[:column] } - } - query_indexes << index + # TODO if multiple indexes are found (for either single or multicolumn) + # determine the impact of each individually + # for now, be conservative and don't suggest if more than one index + suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1 - if cost_savings - new_indexes[index] ||= index.dup - (new_indexes[index][:queries] ||= []) << query - end + if suggest_index + query_indexes.each do |index| + new_indexes[index] ||= index.dup + (new_indexes[index][:queries] ||= []) << query end end - end - if @log_level == "debug2" - log "Processed #{query.fingerprint}" - if tables.empty? - log "No candidate tables for indexes" - elsif query.explainable? && !query.high_cost? - log "Low initial cost: #{query.initial_cost}" - elsif query.explainable? - log "Cost: #{query.initial_cost} -> #{query.new_cost}" - - if query_indexes.any? - log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}" - log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2 + query.indexes = query_indexes + query.suggest_index = suggest_index + query.new_cost = + if suggest_index + cost_savings2 ? new_cost2 : new_cost else - log "Indexes: None" + query.initial_cost end - elsif query.fingerprint == "unknown" - log "Could not parse query" - elsif query.tables.empty? - log "No tables" - elsif query.missing_tables - log "Tables not present in current database" - else - log "Could not run explain" + + # TODO optimize + if @log_level.start_with?("debug") + query.pass1_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set) + query.pass2_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[2], index_set) end + end + end - puts - puts query.statement - puts + # filter out covered indexes + covered = Set.new + new_indexes.values.each do |index| + if index[:columns].size > 1 + covered << [index[:table], index[:columns].first(1)] end end - new_indexes.values.sort_by(&:to_a) + new_indexes.values.reject { |i| covered.include?([i[:table], i[:columns]]) }.sort_by(&:to_a) end - def show_and_create_indexes(new_indexes) + def log_indexes(indexes) + if indexes.any? + indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ") + else + "None" + end + end + + def show_and_create_indexes(new_indexes, queries, tables) if new_indexes.any? new_indexes.each do |index| log "Index found: #{index[:table]} (#{index[:columns].join(", ")})" + end - if @log_level.start_with?("debug") - index[:queries].sort_by(&:fingerprint).each do |query| - log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})" - puts - puts query.statement - puts + if @log_level.start_with?("debug") + index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) } + if @log_level == "debug2" + fingerprints = Set.new(index_queries.map(&:fingerprint)) + index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint)) + end + index_queries.each do |query| + log "-" * 80 + log "Query #{query.fingerprint}" + log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.total_time + if tables.empty? + log "No candidate tables for indexes" + elsif query.explainable? && !query.high_cost? + log "Low initial cost: #{query.initial_cost}" + elsif query.explainable? + query_indexes = query.indexes || [] + log "Start: #{query.costs[0]}" + log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}" + log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}" + log "Final: #{query.new_cost} : #{log_indexes(query_indexes)}" + if query_indexes.any? && !query.suggest_index + log "Need 50% cost savings to suggest index" + end + elsif query.fingerprint == "unknown" + log "Could not parse query" + elsif query.tables.empty? + log "No tables" + elsif query.missing_tables + log "Tables not present in current database" + else + log "Could not run explain" end + log + log query.statement + log end end if @create # 1. create lock @@ -345,16 +403,14 @@ # strip semi-colons as another measure of defense JSON.parse(execute("EXPLAIN (FORMAT JSON) #{safe_statement(query)}").first["QUERY PLAN"]).first["Plan"] end # TODO for multicolumn indexes, use ordering - def create_hypothetical_indexes_helper(columns_by_table, n, index_set, candidates) + def create_hypothetical_indexes_helper(columns_by_table, n, candidates) columns_by_table.each do |table, cols| # no reason to use btree index for json columns cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set| - if !index_set.include?([table, col_set.map { |col| col[:column] }]) - candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"] - end + candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"] end end end def database_tables