lib/pghero/methods/suggested_indexes.rb in pghero-1.7.0 vs lib/pghero/methods/suggested_indexes.rb in pghero-2.0.0

- old
+ new

@@ -1,32 +1,32 @@ module PgHero module Methods module SuggestedIndexes def suggested_indexes_enabled? - defined?(PgQuery) && query_stats_enabled? + defined?(PgQuery) && Gem::Version.new(PgQuery::VERSION) >= Gem::Version.new("0.9.0") && query_stats_enabled? end # TODO clean this mess - def suggested_indexes_by_query(options = {}) + def suggested_indexes_by_query(queries: nil, query_stats: nil, indexes: nil) best_indexes = {} if suggested_indexes_enabled? # get most time-consuming queries - queries = options[:queries] || (options[:query_stats] || query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs["query"] } + queries ||= (query_stats || self.query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs[:query] } # get best indexes for queries best_indexes = best_index_helper(queries) if best_indexes.any? existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } } - indexes = self.indexes - indexes.group_by { |g| g["using"] }.each do |group, inds| + indexes ||= self.indexes + indexes.group_by { |g| g[:using] }.each do |group, inds| inds.each do |i| - existing_columns[group][i["table"]] << i["columns"] + existing_columns[group][i[:table]] << i[:columns] end end - indexes_by_table = indexes.group_by { |i| i["table"] } + indexes_by_table = indexes.group_by { |i| i[:table] } best_indexes.each do |_query, best_index| if best_index[:found] index = best_index[:index] best_index[:table_indexes] = indexes_by_table[index[:table]].to_a @@ -36,48 +36,41 @@ best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})" end end end end + else + raise NotEnabled, "Suggested indexes not enabled" end best_indexes end - def suggested_indexes(options = {}) + def suggested_indexes(suggested_indexes_by_query: nil, **options) indexes = [] - (options[:suggested_indexes_by_query] || suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group| + (suggested_indexes_by_query || self.suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group| details = {} group.map(&:second).each do |g| details = details.except(:index).deep_merge(g) end indexes << index.merge(queries: group.map(&:first), details: details) end indexes.sort_by { |i| [i[:table], i[:columns]] } end - def autoindex(options = {}) + def autoindex(create: false) suggested_indexes.each do |index| p index - if options[:create] + if create connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})") end end end - def autoindex_all(options = {}) - config["databases"].keys.each do |database| - with(database) do - puts "Autoindexing #{database}..." - autoindex(options) - end - end - end - - def best_index(statement, _options = {}) + def best_index(statement) best_index_helper([statement])[statement] end private @@ -93,12 +86,12 @@ # get stats about columns for relevant tables tables = parts.values.map { |t| t[:table] }.uniq # TODO get schema from query structure, then try search path schema = connection_model.connection_config[:schema] || "public" if tables.any? - row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i["table"], i["reltuples"]] }] - col_stats = column_stats(table: tables, schema: schema).group_by { |i| i["table"] } + row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i[:table], i[:estimated_rows]] }] + col_stats = column_stats(table: tables, schema: schema).group_by { |i| i[:table] } end # find best index based on query structure and column stats parts.each do |statement, structure| index = {found: false} @@ -115,11 +108,11 @@ sort = structure[:sort] total_rows = row_stats[table].to_i index[:rows] = total_rows - ranks = Hash[col_stats[table].to_a.map { |r| [r["column"], r] }] + ranks = Hash[col_stats[table].to_a.map { |r| [r[:column], r] }] columns = (where + sort).map { |c| c[:column] }.uniq if columns.any? if columns.all? { |c| ranks[c] } first_desc = sort.index { |c| c[:direction] == "desc" } @@ -186,44 +179,38 @@ def best_index_structure(statement) return {error: "Too large"} if statement.to_s.length > 10000 begin - parsed_statement = PgQuery.parse(statement) - v2 = parsed_statement.respond_to?(:tree) - tree = v2 ? parsed_statement.tree : parsed_statement.parsetree + tree = PgQuery.parse(statement).tree rescue PgQuery::ParseError return {error: "Parse error"} end return {error: "Unknown structure"} unless tree.size == 1 tree = tree.first table = parse_table(tree) rescue nil unless table error = case tree.keys.first - when "InsertStmt", "INSERT INTO" + when "InsertStmt" "INSERT statement" - when "VariableSetStmt", "SET" + when "VariableSetStmt" "SET statement" when "SelectStmt" if (tree["SelectStmt"]["fromClause"].first["JoinExpr"] rescue false) "JOIN not supported yet" end - when "SELECT" - if (tree["SELECT"]["fromClause"].first["JOINEXPR"] rescue false) - "JOIN not supported yet" - end end return {error: error || "Unknown structure"} end select = tree.values.first - where = (select["whereClause"] ? parse_where(select["whereClause"], v2) : []) rescue nil + where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil return {error: "Unknown structure"} unless where - sort = (select["sortClause"] ? parse_sort(select["sortClause"], v2) : []) rescue [] + sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue [] {table: table, where: where, sort: sort} end def index_covers?(indexed_columns, columns) @@ -233,26 +220,26 @@ # TODO better row estimation # http://www.postgresql.org/docs/current/static/row-estimation-examples.html def row_estimates(stats, total_rows, rows_left, op) case op when "null" - rows_left * stats["null_frac"].to_f + rows_left * stats[:null_frac].to_f when "not_null" - rows_left * (1 - stats["null_frac"].to_f) + rows_left * (1 - stats[:null_frac].to_f) else - rows_left *= (1 - stats["null_frac"].to_f) + rows_left *= (1 - stats[:null_frac].to_f) ret = - if stats["n_distinct"].to_f == 0 + if stats[:n_distinct].to_f == 0 0 - elsif stats["n_distinct"].to_f < 0 + elsif stats[:n_distinct].to_f < 0 if total_rows > 0 - (-1 / stats["n_distinct"].to_f) * (rows_left / total_rows.to_f) + (-1 / stats[:n_distinct].to_f) * (rows_left / total_rows.to_f) else 0 end else - rows_left / stats["n_distinct"].to_f + rows_left / stats[:n_distinct].to_f end case op when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN" (rows_left + ret) / 10.0 # TODO better approximation @@ -270,100 +257,54 @@ tree["SelectStmt"]["fromClause"].first["RangeVar"]["relname"] when "DeleteStmt" tree["DeleteStmt"]["relation"]["RangeVar"]["relname"] when "UpdateStmt" tree["UpdateStmt"]["relation"]["RangeVar"]["relname"] - when "SELECT" - tree["SELECT"]["fromClause"].first["RANGEVAR"]["relname"] - when "DELETE FROM" - tree["DELETE FROM"]["relation"]["RANGEVAR"]["relname"] - when "UPDATE" - tree["UPDATE"]["relation"]["RANGEVAR"]["relname"] end end # TODO capture values - def parse_where(tree, v2 = false) - if v2 - aexpr = tree["A_Expr"] + def parse_where(tree) + aexpr = tree["A_Expr"] - if tree["BoolExpr"] - if tree["BoolExpr"]["boolop"] == 0 - tree["BoolExpr"]["args"].flat_map { |v| parse_where(v, v2) } - else - raise "Not Implemented" - end - elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"]) - [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}] - elsif tree["NullTest"] - op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null" - [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}] + if tree["BoolExpr"] + if tree["BoolExpr"]["boolop"] == 0 + tree["BoolExpr"]["args"].flat_map { |v| parse_where(v) } else raise "Not Implemented" end + elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"]) + [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}] + elsif tree["NullTest"] + op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null" + [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}] else - aexpr = tree["AEXPR"] || tree[nil] - - if tree["BOOLEXPR"] - if tree["BOOLEXPR"]["boolop"] == 0 - tree["BOOLEXPR"]["args"].flat_map { |v| parse_where(v) } - else - raise "Not Implemented" - end - elsif tree["AEXPR AND"] - left = parse_where(tree["AEXPR AND"]["lexpr"]) - right = parse_where(tree["AEXPR AND"]["rexpr"]) - if left && right - left + right - else - raise "Not Implemented" - end - elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first) - [{column: aexpr["lexpr"]["COLUMNREF"]["fields"].last, op: aexpr["name"].first}] - elsif tree["AEXPR IN"] && ["=", "<>"].include?(tree["AEXPR IN"]["name"].first) - [{column: tree["AEXPR IN"]["lexpr"]["COLUMNREF"]["fields"].last, op: tree["AEXPR IN"]["name"].first}] - elsif tree["NULLTEST"] - op = tree["NULLTEST"]["nulltesttype"] == 1 ? "not_null" : "null" - [{column: tree["NULLTEST"]["arg"]["COLUMNREF"]["fields"].last, op: op}] - else - raise "Not Implemented" - end + raise "Not Implemented" end end - def parse_sort(sort_clause, v2) - if v2 - sort_clause.map do |v| - { - column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"], - direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc" - } - end - else - sort_clause.map do |v| - { - column: v["SORTBY"]["node"]["COLUMNREF"]["fields"].last, - direction: v["SORTBY"]["sortby_dir"] == 2 ? "desc" : "asc" - } - end + def parse_sort(sort_clause) + sort_clause.map do |v| + { + column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"], + direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc" + } end end - def column_stats(options = {}) - schema = options[:schema] - tables = options[:table] ? Array(options[:table]) : nil + def column_stats(schema: nil, table: nil) select_all <<-SQL SELECT schemaname AS schema, tablename AS table, attname AS column, null_frac, n_distinct FROM pg_stats WHERE - #{tables ? "tablename IN (#{tables.map { |t| quote(t) }.join(", ")})" : "1 = 1"} - AND schemaname = #{quote(schema)} + schemaname = #{quote(schema)} + #{table ? "AND tablename IN (#{Array(table).map { |t| quote(t) }.join(", ")})" : ""} ORDER BY 1, 2, 3 SQL end end