lib/dexter/indexer.rb in pgdexter-0.5.2 vs lib/dexter/indexer.rb in pgdexter-0.5.3

- old
+ new

@@ -94,14 +94,20 @@ # analyze tables if needed analyze_tables(tables) if tables.any? && (@analyze || @log_level == "debug2") # create hypothetical indexes and explain queries - candidates = tables.any? ? create_hypothetical_indexes(queries.select(&:candidate_tables)) : {} + if tables.any? + # process in batches to prevent "hypopg: not more oid available" error + # https://hypopg.readthedocs.io/en/rel1_stable/usage.html#configuration + queries.select(&:candidate_tables).each_slice(500) do |batch| + create_hypothetical_indexes(batch) + end + end # see if new indexes were used and meet bar - new_indexes = determine_indexes(queries, candidates, tables) + new_indexes = determine_indexes(queries, tables) # display and create new indexes show_and_create_indexes(new_indexes, queries) end @@ -226,11 +232,13 @@ # get next round of costs calculate_plan(explainable_queries) end - candidates + queries.each do |query| + query.candidates = candidates + end end def find_columns(plan) plan = JSON.parse(plan.to_json, max_nesting: 1000) find_by_key(plan, "ColumnRef") @@ -280,13 +288,12 @@ end query_indexes end - def determine_indexes(queries, candidates, tables) + def determine_indexes(queries, tables) new_indexes = {} - index_name_to_columns = candidates.invert # filter out existing indexes # this must happen at end of process # since sometimes hypothetical indexes # can give lower cost than actual indexes @@ -311,15 +318,15 @@ # set high bar for multicolumn indexes cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio key = cost_savings2 ? 2 : 1 - query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set) + query_indexes = hypo_indexes_from_plan(query.candidates, query.plans[key], index_set) # likely a bad suggestion, so try single column if cost_savings2 && query_indexes.size > 1 - query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set) + query_indexes = hypo_indexes_from_plan(query.candidates, query.plans[1], index_set) cost_savings2 = false end suggest_index = cost_savings || cost_savings2 @@ -388,12 +395,12 @@ query.initial_cost end # TODO optimize if @log_level.start_with?("debug") - query.pass1_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set) - query.pass2_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[2], index_set) + query.pass1_indexes = hypo_indexes_from_plan(query.candidates, query.plans[1], index_set) + query.pass2_indexes = hypo_indexes_from_plan(query.candidates, query.plans[2], index_set) end end end # filter out covered indexes @@ -593,11 +600,12 @@ # TODO for multicolumn indexes, use ordering def create_hypothetical_indexes_helper(columns_by_table, n, candidates) columns_by_table.each do |table, cols| # no reason to use btree index for json columns cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set| - candidates[col_set] = create_hypothetical_index(table, col_set) + index_name = create_hypothetical_index(table, col_set) + candidates[index_name] = col_set end end end def create_hypothetical_index(table, col_set) @@ -610,9 +618,10 @@ table_schema || '.' || table_name AS table_name FROM information_schema.tables WHERE table_catalog = current_database() + AND table_type IN ('BASE TABLE', 'VIEW') SQL result.map { |r| r["table_name"] } end def materialized_views