lib/tree_clusters.rb in tree_clusters-0.2.0 vs lib/tree_clusters.rb in tree_clusters-0.3.0
- old
+ new
@@ -30,18 +30,97 @@
# @param tree [NewickTree] a NewickTree object
#
# @yieldparam clade [Clade] a clade of the tree
#
# @return [Enumerator<Clade>] enumerator of Clade objects
- def all_clades tree
- return enum_for(:all_clades, tree) unless block_given?
+ def all_clades tree, metadata=nil
+ return enum_for(:all_clades, tree, metadata) unless block_given?
tree.clade_nodes.reverse.each do |node|
- yield Clade.new node, tree
+ yield Clade.new node, tree, metadata
end
end
+ def snazzy_clades tree, metadata
+ snazzy_clades = {}
+
+ clades = self.
+ all_clades(tree, metadata).
+ sort_by { |clade| clade.all_leaves.count }.
+ reverse
+
+ metadata.each do |md_cat, leaf2mdtag|
+ already_checked = Set.new
+ single_tag_clades = {}
+
+ clades.each do |clade|
+ assert clade.all_leaves.count > 1,
+ "A clade cannot also be a leaf"
+
+ unless clade.all_leaves.all? do |leaf|
+ already_checked.include? leaf
+ end
+ md_tags = clade.all_leaves.map do |leaf|
+ assert leaf2mdtag.has_key?(leaf),
+ "leaf #{leaf} is missing from leaf2mdtag ht"
+
+ leaf2mdtag[leaf]
+ end
+
+ # this clade is mono-phyletic w.r.t. this metadata category.
+ if md_tags.uniq.count == 1
+ clade.all_leaves.each do |leaf|
+ already_checked << leaf
+ end
+
+ assert !single_tag_clades.has_key?(clade),
+ "clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
+
+ single_tag_clades[clade] = md_tags.first
+ end
+ end
+ end
+
+ single_tag_clades.each do |clade, md_tag|
+ non_clade_leaves = tree.taxa - clade.all_leaves
+
+ non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
+ [leaf, leaf2mdtag[leaf]]
+ end.select { |ary| ary.last == md_tag }
+
+ if non_clade_leaves_with_this_md_tag.count.zero?
+ if snazzy_clades.has_key? clade
+ snazzy_clades[clade][md_cat] = md_tag
+ else
+ snazzy_clades[clade] = { md_cat => md_tag }
+ end
+ end
+ end
+ end
+
+ snazzy_clades
+ end
+
+ def read_mapping_file fname
+ md_cat_names = nil
+ metadata = TreeClusters::Attrs.new
+
+ File.open(fname, "rt").each_line.with_index do |line, idx|
+ leaf_name, *metadata_vals = line.chomp.split "\t"
+
+ if idx.zero?
+ md_cat_names = metadata_vals
+ else
+ metadata_vals.each_with_index do |val, val_idx|
+ metadata.add md_cat_names[val_idx], leaf_name, val
+ end
+ end
+ end
+
+ metadata
+ end
+
# A Hash table for genome/leaf/taxa attributes
class Attrs < Hash
# Returns the an AttrArray of Sets for the given genomes and
# attribute.
@@ -111,15 +190,17 @@
:right_leaves,
:all_sibling_leaves,
:each_sibling_leaf_set,
:parent_leaves,
:non_parent_leaves,
- :other_leaves
+ :other_leaves,
+ :single_tag_info,
+ :all_tags
# @param node [NewickNode] a NewickNode from a NewickTree
# @param tree [NewickTree] a NewickTree
- def initialize node, tree
+ def initialize node, tree, metadata=nil
@name = node.name
@all_leaves = descendant_leaves node
if (children = node.children).count == 2
lchild, rchild = node.children
@@ -146,10 +227,18 @@
@other_leaves =
Object::Set.new(tree.taxa) - Object::Set.new(all_leaves)
@non_parent_leaves =
Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
+
+ if metadata
+ @metadata = metadata
+ @all_tags ||= get_all_tags
+ @single_tag_info ||= get_single_tag_info
+ else
+ @single_tag_info = nil
+ end
end
# Compares two Clades field by field.
#
# If all instance variables are == than the two clades are == as
@@ -161,19 +250,41 @@
self.left_leaves == clade.left_leaves &&
self.right_leaves == clade.right_leaves &&
self.all_sibling_leaves == clade.all_sibling_leaves &&
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
self.parent_leaves == clade.parent_leaves &&
- self.other_leaves == clade.other_leaves
+ self.other_leaves == clade.other_leaves &&
+ self.single_tag_info == clade.single_tag_info
)
end
# Alias for ==
def eql? clade
self == clade
end
private
+
+ def get_single_tag_info
+ @all_tags.map do |md_cat, set|
+ [md_cat, set.count == 1 ? set.to_a.first : nil]
+ end.to_h
+ end
+
+ def get_all_tags
+ # name2tag has leaf names => metadata tag and is an Attrs
+ @metadata.map do |md_cat, name2tag|
+ tag_info = self.all_leaves.map do |leaf|
+ assert name2tag.has_key?(leaf),
+ "leaf #{leaf} is not present in name2tag ht for " +
+ "md_cat #{md_cat}"
+
+ name2tag[leaf]
+ end
+
+ [md_cat, Set.new(tag_info)]
+ end.to_h
+ end
def descendant_leaves node
if node.leaf?
[node.name]
else