lib/dexter/indexer.rb in pgdexter-0.3.2 vs lib/dexter/indexer.rb in pgdexter-0.3.3
- old
+ new
@@ -10,10 +10,11 @@
@log_sql = options[:log_sql]
@log_explain = options[:log_explain]
@min_time = options[:min_time] || 0
@min_calls = options[:min_calls] || 0
@analyze = options[:analyze]
+ @min_cost_savings_pct = options[:min_cost_savings_pct].to_i
@options = options
create_extension unless extension_exists?
execute("SET lock_timeout = '5s'")
end
@@ -26,24 +27,41 @@
def process_queries(queries)
# reset hypothetical indexes
reset_hypothetical_indexes
- tables = Set.new(database_tables)
+ tables = Set.new(database_tables + materialized_views)
# map tables without schema to schema
no_schema_tables = {}
search_path_index = Hash[search_path.map.with_index.to_a]
tables.group_by { |t| t.split(".")[-1] }.each do |group, t2|
no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0]
end
+ # add tables from views
+ view_tables = database_view_tables
+ view_tables.each do |v, vt|
+ view_tables[v] = vt.map { |t| no_schema_tables[t] || t }
+ end
+
+ # fully resolve tables
+ # make sure no views in result
+ view_tables.each do |v, vt|
+ view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq
+ end
+
# filter queries from other databases and system tables
queries.each do |query|
# add schema to table if needed
query.tables = query.tables.map { |t| no_schema_tables[t] || t }
+ # substitute view tables
+ new_tables = query.tables.flat_map { |t| view_tables[t] || [t] }.uniq
+ query.tables_from_views = new_tables - query.tables
+ query.tables = new_tables
+
# check for missing tables
query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
end
# set tables
@@ -164,10 +182,11 @@
calculate_plan(queries)
explainable_queries = queries.select { |q| q.explainable? && q.high_cost? }
# filter tables for performance
tables = Set.new(explainable_queries.flat_map(&:tables))
+ tables_from_views = Set.new(explainable_queries.flat_map(&:tables_from_views))
if tables.any?
# since every set of multi-column indexes are expensive
# try to parse out columns
possible_columns = Set.new
@@ -180,11 +199,12 @@
end
end
end
# create hypothetical indexes
- columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
+ # use all columns in tables from views
+ columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) || tables_from_views.include?(c[:table]) }.group_by { |c| c[:table] }
# create single column indexes
create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
# get next round of costs
@@ -263,33 +283,80 @@
index_set << [index["table"], index["columns"].first(2)]
end
end
end
+ savings_ratio = (1 - @min_cost_savings_pct / 100.0)
+
queries.each do |query|
if query.explainable? && query.high_cost?
new_cost, new_cost2 = query.costs[1..2]
- cost_savings = new_cost < query.initial_cost * 0.5
+ cost_savings = new_cost < query.initial_cost * savings_ratio
# set high bar for multicolumn indexes
- cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
+ cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio
key = cost_savings2 ? 2 : 1
query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set)
# likely a bad suggestion, so try single column
if cost_savings2 && query_indexes.size > 1
query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
cost_savings2 = false
end
- # TODO if multiple indexes are found (for either single or multicolumn)
+ suggest_index = cost_savings || cost_savings2
+
+ cost_savings3 = false
+ new_cost3 = nil
+
+ # if multiple indexes are found (for either single or multicolumn)
# determine the impact of each individually
- # for now, be conservative and don't suggest if more than one index
- suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1
+ # there may be a better single index that we're not considering
+ # that didn't get picked up by pass1 or pass2
+ # TODO clean this up
+ # TODO suggest more than one index from this if savings are there
+ if suggest_index && query_indexes.size > 1
+ winning_index = nil
+ winning_cost = nil
+ winning_plan = nil
+ query_indexes.each do |query_index|
+ reset_hypothetical_indexes
+ create_hypothetical_index(query_index[:table], query_index[:columns].map { |v| {column: v} })
+ plan3 = plan(query.statement)
+ cost3 = plan3["Total Cost"]
+
+ if !winning_cost || cost3 < winning_cost
+ winning_cost = cost3
+ winning_index = query_index
+ winning_plan = plan3
+ end
+ end
+
+ query.plans << winning_plan
+
+ # duplicated from above
+ # TODO DRY
+ use_winning =
+ if cost_savings2
+ new_cost > 100 && winning_cost < new_cost * savings_ratio
+ else
+ winning_cost < query.initial_cost * savings_ratio
+ end
+
+ if use_winning
+ query_indexes = [winning_index]
+ cost_savings3 = true
+ new_cost3 = winning_cost
+ query.pass3_indexes = query_indexes
+ else
+ suggest_index = false
+ end
+ end
+
if suggest_index
query_indexes.each do |index|
new_indexes[index] ||= index.dup
(new_indexes[index][:queries] ||= []) << query
end
@@ -297,11 +364,11 @@
query.indexes = query_indexes
query.suggest_index = suggest_index
query.new_cost =
if suggest_index
- cost_savings2 ? new_cost2 : new_cost
+ cost_savings3 ? new_cost3 : (cost_savings2 ? new_cost2 : new_cost)
else
query.initial_cost
end
# TODO optimize
@@ -366,13 +433,16 @@
elsif query.explainable?
query_indexes = query.indexes || []
log "Start: #{query.costs[0]}"
log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
+ if query.costs[3]
+ log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}"
+ end
log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}"
if query_indexes.size == 1 && !query.suggest_index
- log "Need 50% cost savings to suggest index"
+ log "Need #{@min_cost_savings_pct}% cost savings to suggest index"
end
else
log "Could not run explain"
end
log
@@ -447,15 +517,19 @@
# TODO for multicolumn indexes, use ordering
def create_hypothetical_indexes_helper(columns_by_table, n, candidates)
columns_by_table.each do |table, cols|
# no reason to use btree index for json columns
cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
- candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
+ candidates[col_set] = create_hypothetical_index(table, col_set)
end
end
end
+ def create_hypothetical_index(table, col_set)
+ execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
+ end
+
def database_tables
result = execute <<-SQL
SELECT
table_schema || '.' || table_name AS table_name
FROM
@@ -464,10 +538,47 @@
table_catalog = current_database()
SQL
result.map { |r| r["table_name"] }
end
+ def materialized_views
+ if server_version_num >= 90300
+ result = execute <<-SQL
+ SELECT
+ schemaname || '.' || matviewname AS table_name
+ FROM
+ pg_matviews
+ SQL
+ result.map { |r| r["table_name"] }
+ else
+ []
+ end
+ end
+
+ def server_version_num
+ execute("SHOW server_version_num").first["server_version_num"].to_i
+ end
+
+ def database_view_tables
+ result = execute <<-SQL
+ SELECT
+ schemaname || '.' || viewname AS table_name,
+ definition
+ FROM
+ pg_views
+ WHERE
+ schemaname NOT IN ('information_schema', 'pg_catalog')
+ SQL
+
+ view_tables = {}
+ result.each do |row|
+ view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables
+ end
+
+ view_tables
+ end
+
def stat_statements
result = execute <<-SQL
SELECT
DISTINCT query
FROM
@@ -513,16 +624,18 @@
end
def columns(tables)
columns = execute <<-SQL
SELECT
- table_schema || '.' || table_name AS table_name,
- column_name,
- data_type
- FROM
- information_schema.columns
- WHERE
- table_schema || '.' || table_name IN (#{tables.map { |t| quote(t) }.join(", ")})
+ s.nspname || '.' || t.relname AS table_name,
+ a.attname AS column_name,
+ pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type
+ FROM pg_attribute a
+ JOIN pg_class t on a.attrelid = t.oid
+ JOIN pg_namespace s on t.relnamespace = s.oid
+ WHERE a.attnum > 0
+ AND NOT a.attisdropped
+ AND s.nspname || '.' || t.relname IN (#{tables.map { |t| quote(t) }.join(", ")})
ORDER BY
1, 2
SQL
columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} }