lib/pghero/methods/suggested_indexes.rb in pghero-1.7.0 vs lib/pghero/methods/suggested_indexes.rb in pghero-2.0.0
- old
+ new
@@ -1,32 +1,32 @@
module PgHero
module Methods
module SuggestedIndexes
def suggested_indexes_enabled?
- defined?(PgQuery) && query_stats_enabled?
+ defined?(PgQuery) && Gem::Version.new(PgQuery::VERSION) >= Gem::Version.new("0.9.0") && query_stats_enabled?
end
# TODO clean this mess
- def suggested_indexes_by_query(options = {})
+ def suggested_indexes_by_query(queries: nil, query_stats: nil, indexes: nil)
best_indexes = {}
if suggested_indexes_enabled?
# get most time-consuming queries
- queries = options[:queries] || (options[:query_stats] || query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs["query"] }
+ queries ||= (query_stats || self.query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs[:query] }
# get best indexes for queries
best_indexes = best_index_helper(queries)
if best_indexes.any?
existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } }
- indexes = self.indexes
- indexes.group_by { |g| g["using"] }.each do |group, inds|
+ indexes ||= self.indexes
+ indexes.group_by { |g| g[:using] }.each do |group, inds|
inds.each do |i|
- existing_columns[group][i["table"]] << i["columns"]
+ existing_columns[group][i[:table]] << i[:columns]
end
end
- indexes_by_table = indexes.group_by { |i| i["table"] }
+ indexes_by_table = indexes.group_by { |i| i[:table] }
best_indexes.each do |_query, best_index|
if best_index[:found]
index = best_index[:index]
best_index[:table_indexes] = indexes_by_table[index[:table]].to_a
@@ -36,48 +36,41 @@
best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})"
end
end
end
end
+ else
+ raise NotEnabled, "Suggested indexes not enabled"
end
best_indexes
end
- def suggested_indexes(options = {})
+ def suggested_indexes(suggested_indexes_by_query: nil, **options)
indexes = []
- (options[:suggested_indexes_by_query] || suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
+ (suggested_indexes_by_query || self.suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
details = {}
group.map(&:second).each do |g|
details = details.except(:index).deep_merge(g)
end
indexes << index.merge(queries: group.map(&:first), details: details)
end
indexes.sort_by { |i| [i[:table], i[:columns]] }
end
- def autoindex(options = {})
+ def autoindex(create: false)
suggested_indexes.each do |index|
p index
- if options[:create]
+ if create
connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})")
end
end
end
- def autoindex_all(options = {})
- config["databases"].keys.each do |database|
- with(database) do
- puts "Autoindexing #{database}..."
- autoindex(options)
- end
- end
- end
-
- def best_index(statement, _options = {})
+ def best_index(statement)
best_index_helper([statement])[statement]
end
private
@@ -93,12 +86,12 @@
# get stats about columns for relevant tables
tables = parts.values.map { |t| t[:table] }.uniq
# TODO get schema from query structure, then try search path
schema = connection_model.connection_config[:schema] || "public"
if tables.any?
- row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i["table"], i["reltuples"]] }]
- col_stats = column_stats(table: tables, schema: schema).group_by { |i| i["table"] }
+ row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i[:table], i[:estimated_rows]] }]
+ col_stats = column_stats(table: tables, schema: schema).group_by { |i| i[:table] }
end
# find best index based on query structure and column stats
parts.each do |statement, structure|
index = {found: false}
@@ -115,11 +108,11 @@
sort = structure[:sort]
total_rows = row_stats[table].to_i
index[:rows] = total_rows
- ranks = Hash[col_stats[table].to_a.map { |r| [r["column"], r] }]
+ ranks = Hash[col_stats[table].to_a.map { |r| [r[:column], r] }]
columns = (where + sort).map { |c| c[:column] }.uniq
if columns.any?
if columns.all? { |c| ranks[c] }
first_desc = sort.index { |c| c[:direction] == "desc" }
@@ -186,44 +179,38 @@
def best_index_structure(statement)
return {error: "Too large"} if statement.to_s.length > 10000
begin
- parsed_statement = PgQuery.parse(statement)
- v2 = parsed_statement.respond_to?(:tree)
- tree = v2 ? parsed_statement.tree : parsed_statement.parsetree
+ tree = PgQuery.parse(statement).tree
rescue PgQuery::ParseError
return {error: "Parse error"}
end
return {error: "Unknown structure"} unless tree.size == 1
tree = tree.first
table = parse_table(tree) rescue nil
unless table
error =
case tree.keys.first
- when "InsertStmt", "INSERT INTO"
+ when "InsertStmt"
"INSERT statement"
- when "VariableSetStmt", "SET"
+ when "VariableSetStmt"
"SET statement"
when "SelectStmt"
if (tree["SelectStmt"]["fromClause"].first["JoinExpr"] rescue false)
"JOIN not supported yet"
end
- when "SELECT"
- if (tree["SELECT"]["fromClause"].first["JOINEXPR"] rescue false)
- "JOIN not supported yet"
- end
end
return {error: error || "Unknown structure"}
end
select = tree.values.first
- where = (select["whereClause"] ? parse_where(select["whereClause"], v2) : []) rescue nil
+ where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil
return {error: "Unknown structure"} unless where
- sort = (select["sortClause"] ? parse_sort(select["sortClause"], v2) : []) rescue []
+ sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue []
{table: table, where: where, sort: sort}
end
def index_covers?(indexed_columns, columns)
@@ -233,26 +220,26 @@
# TODO better row estimation
# http://www.postgresql.org/docs/current/static/row-estimation-examples.html
def row_estimates(stats, total_rows, rows_left, op)
case op
when "null"
- rows_left * stats["null_frac"].to_f
+ rows_left * stats[:null_frac].to_f
when "not_null"
- rows_left * (1 - stats["null_frac"].to_f)
+ rows_left * (1 - stats[:null_frac].to_f)
else
- rows_left *= (1 - stats["null_frac"].to_f)
+ rows_left *= (1 - stats[:null_frac].to_f)
ret =
- if stats["n_distinct"].to_f == 0
+ if stats[:n_distinct].to_f == 0
0
- elsif stats["n_distinct"].to_f < 0
+ elsif stats[:n_distinct].to_f < 0
if total_rows > 0
- (-1 / stats["n_distinct"].to_f) * (rows_left / total_rows.to_f)
+ (-1 / stats[:n_distinct].to_f) * (rows_left / total_rows.to_f)
else
0
end
else
- rows_left / stats["n_distinct"].to_f
+ rows_left / stats[:n_distinct].to_f
end
case op
when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"
(rows_left + ret) / 10.0 # TODO better approximation
@@ -270,100 +257,54 @@
tree["SelectStmt"]["fromClause"].first["RangeVar"]["relname"]
when "DeleteStmt"
tree["DeleteStmt"]["relation"]["RangeVar"]["relname"]
when "UpdateStmt"
tree["UpdateStmt"]["relation"]["RangeVar"]["relname"]
- when "SELECT"
- tree["SELECT"]["fromClause"].first["RANGEVAR"]["relname"]
- when "DELETE FROM"
- tree["DELETE FROM"]["relation"]["RANGEVAR"]["relname"]
- when "UPDATE"
- tree["UPDATE"]["relation"]["RANGEVAR"]["relname"]
end
end
# TODO capture values
- def parse_where(tree, v2 = false)
- if v2
- aexpr = tree["A_Expr"]
+ def parse_where(tree)
+ aexpr = tree["A_Expr"]
- if tree["BoolExpr"]
- if tree["BoolExpr"]["boolop"] == 0
- tree["BoolExpr"]["args"].flat_map { |v| parse_where(v, v2) }
- else
- raise "Not Implemented"
- end
- elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"])
- [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}]
- elsif tree["NullTest"]
- op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null"
- [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}]
+ if tree["BoolExpr"]
+ if tree["BoolExpr"]["boolop"] == 0
+ tree["BoolExpr"]["args"].flat_map { |v| parse_where(v) }
else
raise "Not Implemented"
end
+ elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"])
+ [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}]
+ elsif tree["NullTest"]
+ op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null"
+ [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}]
else
- aexpr = tree["AEXPR"] || tree[nil]
-
- if tree["BOOLEXPR"]
- if tree["BOOLEXPR"]["boolop"] == 0
- tree["BOOLEXPR"]["args"].flat_map { |v| parse_where(v) }
- else
- raise "Not Implemented"
- end
- elsif tree["AEXPR AND"]
- left = parse_where(tree["AEXPR AND"]["lexpr"])
- right = parse_where(tree["AEXPR AND"]["rexpr"])
- if left && right
- left + right
- else
- raise "Not Implemented"
- end
- elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first)
- [{column: aexpr["lexpr"]["COLUMNREF"]["fields"].last, op: aexpr["name"].first}]
- elsif tree["AEXPR IN"] && ["=", "<>"].include?(tree["AEXPR IN"]["name"].first)
- [{column: tree["AEXPR IN"]["lexpr"]["COLUMNREF"]["fields"].last, op: tree["AEXPR IN"]["name"].first}]
- elsif tree["NULLTEST"]
- op = tree["NULLTEST"]["nulltesttype"] == 1 ? "not_null" : "null"
- [{column: tree["NULLTEST"]["arg"]["COLUMNREF"]["fields"].last, op: op}]
- else
- raise "Not Implemented"
- end
+ raise "Not Implemented"
end
end
- def parse_sort(sort_clause, v2)
- if v2
- sort_clause.map do |v|
- {
- column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"],
- direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc"
- }
- end
- else
- sort_clause.map do |v|
- {
- column: v["SORTBY"]["node"]["COLUMNREF"]["fields"].last,
- direction: v["SORTBY"]["sortby_dir"] == 2 ? "desc" : "asc"
- }
- end
+ def parse_sort(sort_clause)
+ sort_clause.map do |v|
+ {
+ column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"],
+ direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc"
+ }
end
end
- def column_stats(options = {})
- schema = options[:schema]
- tables = options[:table] ? Array(options[:table]) : nil
+ def column_stats(schema: nil, table: nil)
select_all <<-SQL
SELECT
schemaname AS schema,
tablename AS table,
attname AS column,
null_frac,
n_distinct
FROM
pg_stats
WHERE
- #{tables ? "tablename IN (#{tables.map { |t| quote(t) }.join(", ")})" : "1 = 1"}
- AND schemaname = #{quote(schema)}
+ schemaname = #{quote(schema)}
+ #{table ? "AND tablename IN (#{Array(table).map { |t| quote(t) }.join(", ")})" : ""}
ORDER BY
1, 2, 3
SQL
end
end