lib/tree_clusters.rb in tree_clusters-0.5.1 vs lib/tree_clusters.rb in tree_clusters-0.5.2

- old
+ new

@@ -20,10 +20,14 @@ @root.descendants.each do |clade| clades.push(clade) if (!clade.children.empty?) end return clades end + + def unquoted_taxa + self.taxa.map { |str| str.tr %q{"'}, "" } + end end # Top level namespace of the Gem. module TreeClusters @@ -80,12 +84,14 @@ end Set.new low_ent_cols end + # @note If there are quoted names in the tree file, they are + # unquoted first. def check_ids tree, mapping, aln - tree_ids = Set.new(NewickTree.fromFile(tree).taxa) + tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa) mapping_ids = Set.new File.open(mapping, "rt").each_line.with_index do |line, idx| unless idx.zero? id, *rest = line.chomp.split @@ -141,12 +147,15 @@ reverse metadata.each do |md_cat, leaf2mdtag| already_checked = Set.new single_tag_clades = {} + p [md_cat, leaf2mdtag] clades.each do |clade| + p [clade.name, clade.all_leaves] + assert clade.all_leaves.count > 1, "A clade cannot also be a leaf" unless clade.all_leaves.all? do |leaf| already_checked.include? leaf @@ -171,11 +180,11 @@ end end end single_tag_clades.each do |clade, md_tag| - non_clade_leaves = tree.taxa - clade.all_leaves + non_clade_leaves = tree.unquoted_taxa - clade.all_leaves non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf| [leaf, leaf2mdtag[leaf]] end.select { |ary| ary.last == md_tag } @@ -286,14 +295,19 @@ :non_parent_leaves, :other_leaves, :single_tag_info, :all_tags + # @note If a node name is quoted, then those quotes are removed + # first. + # # @param node [NewickNode] a NewickNode from a NewickTree # @param tree [NewickTree] a NewickTree def initialize node, tree, metadata=nil - @name = node.name + tree_taxa = tree.unquoted_taxa + + @name = unquote node.name @all_leaves = descendant_leaves node if (children = node.children).count == 2 lchild, rchild = node.children @@ -315,14 +329,14 @@ assert parent, "Noge #{node.name} has no parent. Is it the root?" @parent_leaves = descendant_leaves parent @other_leaves = - Object::Set.new(tree.taxa) - Object::Set.new(all_leaves) + Object::Set.new(tree_taxa) - Object::Set.new(all_leaves) @non_parent_leaves = - Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves) + Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves) if metadata @metadata = metadata @all_tags ||= get_all_tags @single_tag_info ||= get_single_tag_info @@ -343,11 +357,12 @@ self.right_leaves == clade.right_leaves && self.all_sibling_leaves == clade.all_sibling_leaves && self.each_sibling_leaf_set == clade.each_sibling_leaf_set && self.parent_leaves == clade.parent_leaves && self.other_leaves == clade.other_leaves && - self.single_tag_info == clade.single_tag_info + self.single_tag_info == clade.single_tag_info && + self.all_tags == clade.all_tags ) end # Alias for == def eql? clade @@ -377,16 +392,21 @@ end.to_h end def descendant_leaves node if node.leaf? - [node.name] + [unquote(node.name)] else node. descendants. flatten. uniq. - select { |node| node.leaf? }.map(&:name) + select { |node| node.leaf? }. + map { |node| unquote(node.name) } end + end + + def unquote str + str.tr %q{"'}, "" end end end