lib/dexter/indexer.rb in pgdexter-0.1.5 vs lib/dexter/indexer.rb in pgdexter-0.1.6
- old
+ new
@@ -5,15 +5,17 @@
def initialize(database_url, options)
@database_url = database_url
@create = options[:create]
@log_level = options[:log_level]
@exclude_tables = options[:exclude]
+ @include_tables = Array(options[:include].split(",")) if options[:include]
@log_sql = options[:log_sql]
@log_explain = options[:log_explain]
@min_time = options[:min_time] || 0
create_extension unless extension_exists?
+ execute("SET lock_timeout = '5s'")
end
def process_stat_statements
queries = stat_statements.map { |q| Query.new(q) }.sort_by(&:fingerprint).group_by(&:fingerprint).map { |_, v| v.first }
log "Processing #{queries.size} new query fingerprints"
@@ -28,10 +30,14 @@
tables = possible_tables(queries)
queries.each do |query|
query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
end
+ if @include_tables
+ tables = Set.new(tables.to_a & @include_tables)
+ end
+
# exclude user specified tables
# TODO exclude write-heavy tables
@exclude_tables.each do |table|
tables.delete(table)
end
@@ -41,11 +47,11 @@
# create hypothetical indexes and explain queries
candidates = tables.any? ? create_hypothetical_indexes(queries.reject(&:missing_tables), tables) : {}
# see if new indexes were used and meet bar
- new_indexes = determine_indexes(queries, candidates)
+ new_indexes = determine_indexes(queries, candidates, tables)
# display and create new indexes
show_and_create_indexes(new_indexes)
end
@@ -53,10 +59,12 @@
def create_extension
execute("SET client_min_messages = warning")
begin
execute("CREATE EXTENSION IF NOT EXISTS hypopg")
+ rescue PG::UndefinedFile
+ abort "Install HypoPG first: https://github.com/ankane/dexter#installation"
rescue PG::InsufficientPrivilege
abort "Use a superuser to run: CREATE EXTENSION hypopg"
end
end
@@ -113,58 +121,105 @@
end
end
end
def create_hypothetical_indexes(queries, tables)
+ candidates = {}
+
# get initial costs for queries
calculate_plan(queries)
- explainable_queries = queries.select(&:explainable?)
+ explainable_queries = queries.select { |q| q.explainable? && q.high_cost? }
- # get existing indexes
- index_set = Set.new
- indexes(tables).each do |index|
- # TODO make sure btree
- index_set << [index["table"], index["columns"]]
- end
+ # filter tables for performance
+ tables = Set.new(explainable_queries.flat_map(&:tables))
- # create hypothetical indexes
- candidates = {}
- columns_by_table = columns(tables).group_by { |c| c[:table] }
+ if tables.any?
+ # get existing indexes
+ index_set = Set.new
+ indexes(tables).each do |index|
+ # TODO make sure btree
+ index_set << [index["table"], index["columns"]]
+ end
- # create single column indexes
- create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
+ # since every set of multi-column indexes are expensive
+ # try to parse out columns
+ possible_columns = Set.new
+ explainable_queries.each do |query|
+ find_columns(query.tree).each do |col|
+ last_col = col["fields"].last
+ if last_col["String"]
+ possible_columns << last_col["String"]["str"]
+ end
+ end
+ end
- # get next round of costs
- calculate_plan(explainable_queries)
+ # create hypothetical indexes
+ columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
- # create multicolumn indexes
- create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
+ # create single column indexes
+ create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
- # get next round of costs
- calculate_plan(explainable_queries)
+ # get next round of costs
+ calculate_plan(explainable_queries)
+ # create multicolumn indexes
+ create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
+
+ # get next round of costs
+ calculate_plan(explainable_queries)
+ end
+
candidates
end
- def determine_indexes(queries, candidates)
+ def find_columns(plan)
+ find_by_key(plan, "ColumnRef")
+ end
+
+ def find_indexes(plan)
+ find_by_key(plan, "Index Name")
+ end
+
+ def find_by_key(plan, key)
+ indexes = []
+ case plan
+ when Hash
+ plan.each do |k, v|
+ if k == key
+ indexes << v
+ else
+ indexes.concat(find_by_key(v, key))
+ end
+ end
+ when Array
+ indexes.concat(plan.flat_map { |v| find_by_key(v, key) })
+ end
+ indexes
+ end
+
+ def determine_indexes(queries, candidates, tables)
new_indexes = {}
+ index_name_to_columns = candidates.invert
queries.each do |query|
- if query.explainable?
+ if query.explainable? && query.high_cost?
new_cost, new_cost2 = query.costs[1..2]
cost_savings = new_cost < query.initial_cost * 0.5
# set high bar for multicolumn indexes
cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
query.new_cost = cost_savings2 ? new_cost2 : new_cost
query_indexes = []
- candidates.each do |col_set, index_name|
- key = cost_savings2 ? 2 : 1
+ key = cost_savings2 ? 2 : 1
+ indexes = find_indexes(query.plans[key]).uniq.sort
- if query.plans[key].inspect.include?(index_name)
+ indexes.each do |index_name|
+ col_set = index_name_to_columns[index_name]
+
+ if col_set
index = {
table: col_set[0][:table],
columns: col_set.map { |c| c[:column] }
}
query_indexes << index
@@ -177,11 +232,15 @@
end
end
if @log_level == "debug2"
log "Processed #{query.fingerprint}"
- if query.explainable?
+ if tables.empty?
+ log "No candidate tables for indexes"
+ elsif query.explainable? && !query.high_cost?
+ log "Low initial cost: #{query.initial_cost}"
+ elsif query.explainable?
log "Cost: #{query.initial_cost} -> #{query.new_cost}"
if query_indexes.any?
log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2
@@ -231,11 +290,15 @@
new_indexes.each do |index|
unless index_exists?(index)
statement = "CREATE INDEX CONCURRENTLY ON #{quote_ident(index[:table])} (#{index[:columns].map { |c| quote_ident(c) }.join(", ")})"
log "Creating index: #{statement}"
started_at = Time.now
- execute(statement)
- log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
+ begin
+ execute(statement)
+ log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
+ rescue PG::LockNotAvailable => e
+ log "Could not acquire lock: #{index[:table]}"
+ end
end
end
end
end
else