lib/tree_clusters.rb in tree_clusters-0.7.0 vs lib/tree_clusters.rb in tree_clusters-0.8.0

- old
+ new

@@ -87,9 +87,28 @@ end Set.new low_ent_cols end + # Like low_ent_cols method but also returns the bases at the positions. + def low_ent_cols_with_bases leaves, leaf2attrs, entropy_cutoff + low_ent_cols = [] + alns = leaf2attrs.attrs leaves, :aln + aln_cols = alns.transpose + + aln_cols.each_with_index do |aln_col, aln_col_idx| + has_gaps = aln_col.any? { |aa| aa == "-" } + low_entropy = + Shannon::entropy(aln_col.join.upcase) <= entropy_cutoff + + if !has_gaps && low_entropy + low_ent_cols << [(aln_col_idx + 1), aln_col.map(&:upcase).uniq.sort] + end + end + + Set.new low_ent_cols + end + # @note If there are quoted names in the tree file, they are # unquoted first. def check_ids tree, mapping, aln tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)