lib/dexter/indexer.rb in pgdexter-0.2.0 vs lib/dexter/indexer.rb in pgdexter-0.2.1
- old
+ new
@@ -50,11 +50,11 @@
# see if new indexes were used and meet bar
new_indexes = determine_indexes(queries, candidates, tables)
# display and create new indexes
- show_and_create_indexes(new_indexes)
+ show_and_create_indexes(new_indexes, queries, tables)
end
private
def create_extension
@@ -131,17 +131,10 @@
# filter tables for performance
tables = Set.new(explainable_queries.flat_map(&:tables))
if tables.any?
- # get existing indexes
- index_set = Set.new
- indexes(tables).each do |index|
- # TODO make sure btree
- index_set << [index["table"], index["columns"]]
- end
-
# since every set of multi-column indexes are expensive
# try to parse out columns
possible_columns = Set.new
explainable_queries.each do |query|
find_columns(query.tree).each do |col|
@@ -154,17 +147,17 @@
# create hypothetical indexes
columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
# create single column indexes
- create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
+ create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
# get next round of costs
calculate_plan(explainable_queries)
# create multicolumn indexes
- create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
+ create_hypothetical_indexes_helper(columns_by_table, 2, candidates)
# get next round of costs
calculate_plan(explainable_queries)
end
@@ -194,92 +187,157 @@
indexes.concat(plan.flat_map { |v| find_by_key(v, key) })
end
indexes
end
+ def hypo_indexes_from_plan(index_name_to_columns, plan, index_set)
+ query_indexes = []
+
+ find_indexes(plan).uniq.sort.each do |index_name|
+ col_set = index_name_to_columns[index_name]
+
+ if col_set
+ index = {
+ table: col_set[0][:table],
+ columns: col_set.map { |c| c[:column] }
+ }
+
+ unless index_set.include?([index[:table], index[:columns]])
+ query_indexes << index
+ end
+ end
+ end
+
+ query_indexes
+ end
+
def determine_indexes(queries, candidates, tables)
new_indexes = {}
index_name_to_columns = candidates.invert
+ # filter out existing indexes
+ # this must happen at end of process
+ # since sometimes hypothetical indexes
+ # can give lower cost than actual indexes
+ index_set = Set.new
+ if tables.any?
+ indexes(tables).each do |index|
+ if index["using"] == "btree"
+ # don't add indexes that are already covered
+ index_set << [index["table"], index["columns"].first(1)]
+ index_set << [index["table"], index["columns"].first(2)]
+ end
+ end
+ end
+
queries.each do |query|
if query.explainable? && query.high_cost?
new_cost, new_cost2 = query.costs[1..2]
cost_savings = new_cost < query.initial_cost * 0.5
+
# set high bar for multicolumn indexes
cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
- query.new_cost = cost_savings2 ? new_cost2 : new_cost
-
- query_indexes = []
key = cost_savings2 ? 2 : 1
- indexes = find_indexes(query.plans[key]).uniq.sort
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set)
- indexes.each do |index_name|
- col_set = index_name_to_columns[index_name]
+ # likely a bad suggestion, so try single column
+ if cost_savings2 && query_indexes.size > 1
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
+ cost_savings2 = false
+ end
- if col_set
- index = {
- table: col_set[0][:table],
- columns: col_set.map { |c| c[:column] }
- }
- query_indexes << index
+ # TODO if multiple indexes are found (for either single or multicolumn)
+ # determine the impact of each individually
+ # for now, be conservative and don't suggest if more than one index
+ suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1
- if cost_savings
- new_indexes[index] ||= index.dup
- (new_indexes[index][:queries] ||= []) << query
- end
+ if suggest_index
+ query_indexes.each do |index|
+ new_indexes[index] ||= index.dup
+ (new_indexes[index][:queries] ||= []) << query
end
end
- end
- if @log_level == "debug2"
- log "Processed #{query.fingerprint}"
- if tables.empty?
- log "No candidate tables for indexes"
- elsif query.explainable? && !query.high_cost?
- log "Low initial cost: #{query.initial_cost}"
- elsif query.explainable?
- log "Cost: #{query.initial_cost} -> #{query.new_cost}"
-
- if query_indexes.any?
- log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
- log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2
+ query.indexes = query_indexes
+ query.suggest_index = suggest_index
+ query.new_cost =
+ if suggest_index
+ cost_savings2 ? new_cost2 : new_cost
else
- log "Indexes: None"
+ query.initial_cost
end
- elsif query.fingerprint == "unknown"
- log "Could not parse query"
- elsif query.tables.empty?
- log "No tables"
- elsif query.missing_tables
- log "Tables not present in current database"
- else
- log "Could not run explain"
+
+ # TODO optimize
+ if @log_level.start_with?("debug")
+ query.pass1_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
+ query.pass2_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[2], index_set)
end
+ end
+ end
- puts
- puts query.statement
- puts
+ # filter out covered indexes
+ covered = Set.new
+ new_indexes.values.each do |index|
+ if index[:columns].size > 1
+ covered << [index[:table], index[:columns].first(1)]
end
end
- new_indexes.values.sort_by(&:to_a)
+ new_indexes.values.reject { |i| covered.include?([i[:table], i[:columns]]) }.sort_by(&:to_a)
end
- def show_and_create_indexes(new_indexes)
+ def log_indexes(indexes)
+ if indexes.any?
+ indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")
+ else
+ "None"
+ end
+ end
+
+ def show_and_create_indexes(new_indexes, queries, tables)
if new_indexes.any?
new_indexes.each do |index|
log "Index found: #{index[:table]} (#{index[:columns].join(", ")})"
+ end
- if @log_level.start_with?("debug")
- index[:queries].sort_by(&:fingerprint).each do |query|
- log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})"
- puts
- puts query.statement
- puts
+ if @log_level.start_with?("debug")
+ index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) }
+ if @log_level == "debug2"
+ fingerprints = Set.new(index_queries.map(&:fingerprint))
+ index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint))
+ end
+ index_queries.each do |query|
+ log "-" * 80
+ log "Query #{query.fingerprint}"
+ log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.total_time
+ if tables.empty?
+ log "No candidate tables for indexes"
+ elsif query.explainable? && !query.high_cost?
+ log "Low initial cost: #{query.initial_cost}"
+ elsif query.explainable?
+ query_indexes = query.indexes || []
+ log "Start: #{query.costs[0]}"
+ log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
+ log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
+ log "Final: #{query.new_cost} : #{log_indexes(query_indexes)}"
+ if query_indexes.any? && !query.suggest_index
+ log "Need 50% cost savings to suggest index"
+ end
+ elsif query.fingerprint == "unknown"
+ log "Could not parse query"
+ elsif query.tables.empty?
+ log "No tables"
+ elsif query.missing_tables
+ log "Tables not present in current database"
+ else
+ log "Could not run explain"
end
+ log
+ log query.statement
+ log
end
end
if @create
# 1. create lock
@@ -345,16 +403,14 @@
# strip semi-colons as another measure of defense
JSON.parse(execute("EXPLAIN (FORMAT JSON) #{safe_statement(query)}").first["QUERY PLAN"]).first["Plan"]
end
# TODO for multicolumn indexes, use ordering
- def create_hypothetical_indexes_helper(columns_by_table, n, index_set, candidates)
+ def create_hypothetical_indexes_helper(columns_by_table, n, candidates)
columns_by_table.each do |table, cols|
# no reason to use btree index for json columns
cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
- if !index_set.include?([table, col_set.map { |col| col[:column] }])
- candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
- end
+ candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
end
end
end
def database_tables