lib/dexter/indexer.rb in pgdexter-0.3.2 vs lib/dexter/indexer.rb in pgdexter-0.3.3

- old
+ new

@@ -10,10 +10,11 @@ @log_sql = options[:log_sql] @log_explain = options[:log_explain] @min_time = options[:min_time] || 0 @min_calls = options[:min_calls] || 0 @analyze = options[:analyze] + @min_cost_savings_pct = options[:min_cost_savings_pct].to_i @options = options create_extension unless extension_exists? execute("SET lock_timeout = '5s'") end @@ -26,24 +27,41 @@ def process_queries(queries) # reset hypothetical indexes reset_hypothetical_indexes - tables = Set.new(database_tables) + tables = Set.new(database_tables + materialized_views) # map tables without schema to schema no_schema_tables = {} search_path_index = Hash[search_path.map.with_index.to_a] tables.group_by { |t| t.split(".")[-1] }.each do |group, t2| no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0] end + # add tables from views + view_tables = database_view_tables + view_tables.each do |v, vt| + view_tables[v] = vt.map { |t| no_schema_tables[t] || t } + end + + # fully resolve tables + # make sure no views in result + view_tables.each do |v, vt| + view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq + end + # filter queries from other databases and system tables queries.each do |query| # add schema to table if needed query.tables = query.tables.map { |t| no_schema_tables[t] || t } + # substitute view tables + new_tables = query.tables.flat_map { |t| view_tables[t] || [t] }.uniq + query.tables_from_views = new_tables - query.tables + query.tables = new_tables + # check for missing tables query.missing_tables = !query.tables.all? { |t| tables.include?(t) } end # set tables @@ -164,10 +182,11 @@ calculate_plan(queries) explainable_queries = queries.select { |q| q.explainable? && q.high_cost? } # filter tables for performance tables = Set.new(explainable_queries.flat_map(&:tables)) + tables_from_views = Set.new(explainable_queries.flat_map(&:tables_from_views)) if tables.any? # since every set of multi-column indexes are expensive # try to parse out columns possible_columns = Set.new @@ -180,11 +199,12 @@ end end end # create hypothetical indexes - columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] } + # use all columns in tables from views + columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) || tables_from_views.include?(c[:table]) }.group_by { |c| c[:table] } # create single column indexes create_hypothetical_indexes_helper(columns_by_table, 1, candidates) # get next round of costs @@ -263,33 +283,80 @@ index_set << [index["table"], index["columns"].first(2)] end end end + savings_ratio = (1 - @min_cost_savings_pct / 100.0) + queries.each do |query| if query.explainable? && query.high_cost? new_cost, new_cost2 = query.costs[1..2] - cost_savings = new_cost < query.initial_cost * 0.5 + cost_savings = new_cost < query.initial_cost * savings_ratio # set high bar for multicolumn indexes - cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5 + cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio key = cost_savings2 ? 2 : 1 query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set) # likely a bad suggestion, so try single column if cost_savings2 && query_indexes.size > 1 query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set) cost_savings2 = false end - # TODO if multiple indexes are found (for either single or multicolumn) + suggest_index = cost_savings || cost_savings2 + + cost_savings3 = false + new_cost3 = nil + + # if multiple indexes are found (for either single or multicolumn) # determine the impact of each individually - # for now, be conservative and don't suggest if more than one index - suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1 + # there may be a better single index that we're not considering + # that didn't get picked up by pass1 or pass2 + # TODO clean this up + # TODO suggest more than one index from this if savings are there + if suggest_index && query_indexes.size > 1 + winning_index = nil + winning_cost = nil + winning_plan = nil + query_indexes.each do |query_index| + reset_hypothetical_indexes + create_hypothetical_index(query_index[:table], query_index[:columns].map { |v| {column: v} }) + plan3 = plan(query.statement) + cost3 = plan3["Total Cost"] + + if !winning_cost || cost3 < winning_cost + winning_cost = cost3 + winning_index = query_index + winning_plan = plan3 + end + end + + query.plans << winning_plan + + # duplicated from above + # TODO DRY + use_winning = + if cost_savings2 + new_cost > 100 && winning_cost < new_cost * savings_ratio + else + winning_cost < query.initial_cost * savings_ratio + end + + if use_winning + query_indexes = [winning_index] + cost_savings3 = true + new_cost3 = winning_cost + query.pass3_indexes = query_indexes + else + suggest_index = false + end + end + if suggest_index query_indexes.each do |index| new_indexes[index] ||= index.dup (new_indexes[index][:queries] ||= []) << query end @@ -297,11 +364,11 @@ query.indexes = query_indexes query.suggest_index = suggest_index query.new_cost = if suggest_index - cost_savings2 ? new_cost2 : new_cost + cost_savings3 ? new_cost3 : (cost_savings2 ? new_cost2 : new_cost) else query.initial_cost end # TODO optimize @@ -366,13 +433,16 @@ elsif query.explainable? query_indexes = query.indexes || [] log "Start: #{query.costs[0]}" log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}" log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}" + if query.costs[3] + log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}" + end log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}" if query_indexes.size == 1 && !query.suggest_index - log "Need 50% cost savings to suggest index" + log "Need #{@min_cost_savings_pct}% cost savings to suggest index" end else log "Could not run explain" end log @@ -447,15 +517,19 @@ # TODO for multicolumn indexes, use ordering def create_hypothetical_indexes_helper(columns_by_table, n, candidates) columns_by_table.each do |table, cols| # no reason to use btree index for json columns cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set| - candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"] + candidates[col_set] = create_hypothetical_index(table, col_set) end end end + def create_hypothetical_index(table, col_set) + execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"] + end + def database_tables result = execute <<-SQL SELECT table_schema || '.' || table_name AS table_name FROM @@ -464,10 +538,47 @@ table_catalog = current_database() SQL result.map { |r| r["table_name"] } end + def materialized_views + if server_version_num >= 90300 + result = execute <<-SQL + SELECT + schemaname || '.' || matviewname AS table_name + FROM + pg_matviews + SQL + result.map { |r| r["table_name"] } + else + [] + end + end + + def server_version_num + execute("SHOW server_version_num").first["server_version_num"].to_i + end + + def database_view_tables + result = execute <<-SQL + SELECT + schemaname || '.' || viewname AS table_name, + definition + FROM + pg_views + WHERE + schemaname NOT IN ('information_schema', 'pg_catalog') + SQL + + view_tables = {} + result.each do |row| + view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables + end + + view_tables + end + def stat_statements result = execute <<-SQL SELECT DISTINCT query FROM @@ -513,16 +624,18 @@ end def columns(tables) columns = execute <<-SQL SELECT - table_schema || '.' || table_name AS table_name, - column_name, - data_type - FROM - information_schema.columns - WHERE - table_schema || '.' || table_name IN (#{tables.map { |t| quote(t) }.join(", ")}) + s.nspname || '.' || t.relname AS table_name, + a.attname AS column_name, + pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type + FROM pg_attribute a + JOIN pg_class t on a.attrelid = t.oid + JOIN pg_namespace s on t.relnamespace = s.oid + WHERE a.attnum > 0 + AND NOT a.attisdropped + AND s.nspname || '.' || t.relname IN (#{tables.map { |t| quote(t) }.join(", ")}) ORDER BY 1, 2 SQL columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} }