#!/usr/bin/env ruby Signal.trap("PIPE", "EXIT") require "tree_clusters" require "trollop" require "parse_fasta" require "shannon" require "fileutils" TreeClusters.extend TreeClusters def puts_info outf, clade_id, key_cols outf.puts [clade_id, key_cols.count, key_cols.to_a].join "\t" end opts = Trollop.options do version TreeClusters::VERSION banner <<-EOS Note that if a clade's parent would be the root of the tree, no columns will be subtracted when removing the parent columns as it would be the entire alignment. Options: EOS opt(:tree, "Newick tree file", type: :string) opt(:mapping, "Mapping file", type: :string) opt(:aln, "Alignment file", type: :string) opt(:entropy_cutoff, "Cutoff to consider a column low entropy", default: 0.0) opt(:clade_size_cutoff, "Consider only clades with at least this many leaves", default: 1) opt(:outdir, "Output directory", default: ".") opt(:base, "Basename for output", default: "snazzy_clades") end abort_if opts[:tree].nil?, "--tree is a required arg" abort_if opts[:mapping].nil?, "--mapping is a required arg" abort_if opts[:aln].nil?, "--aln is a required arg" abort_unless_file_exists opts[:tree] abort_unless_file_exists opts[:mapping] abort_unless_file_exists opts[:aln] TreeClusters.check_ids opts[:tree], opts[:mapping], opts[:aln] abort_unless opts[:entropy_cutoff] >= 0, "--entropy-cutoff must be >= 0" abort_unless opts[:clade_size_cutoff] >= 1, "--clade-size-cutoff must be >= 1" FileUtils.mkdir_p opts[:outdir] tree = NewickTree.fromFile opts[:tree] metadata = TreeClusters.read_mapping_file opts[:mapping] snazzy_clades = TreeClusters.snazzy_clades tree, metadata leaf2attrs = TreeClusters.read_alignment opts[:aln] clades_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades.txt" members_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades_clade_members.txt" all_key_cols_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades_key_cols.txt" key_cols_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades_key_cols.txt" key_cols_minus_parent_cols_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades_key_cols_minus_parent_cols.txt" key_cols_minus_sibling_cols_fname = File.join opts[:outdir], "#{opts[:base]}.snazzy_clades_key_cols_minus_sibling_cols.txt" info_f = File.open(clades_fname, "w") clade_members_f = File.open(members_fname, "w") key_cols_f = File.open(key_cols_fname, "w") key_cols_minus_parent_cols_f = File.open(key_cols_minus_parent_cols_fname, "w") key_cols_minus_sibling_cols_f = File.open(key_cols_minus_sibling_cols_fname, "w") begin # info is { metadata_category => metadata_tag , ... } snazzy_clades.each_with_index do |(clade, info), idx| clade_id = "clade_#{idx+1}___#{clade.name}" info_f.puts [clade_id, info.count, info.map { |pair| pair.join("|")}].join "\t" clade_members_f.puts [clade_id, clade.all_leaves.count, clade.all_leaves].join "\t" key_cols_all_leaves = TreeClusters.low_ent_cols clade.all_leaves, leaf2attrs, opts[:entropy_cutoff] key_cols_all_sibling_leaves = TreeClusters.low_ent_cols clade.all_sibling_leaves, leaf2attrs, opts[:entropy_cutoff] key_cols_parent_leaves = TreeClusters.low_ent_cols clade.parent_leaves, leaf2attrs, opts[:entropy_cutoff] key_cols_all_minus_sibling = key_cols_all_leaves - key_cols_all_sibling_leaves key_cols_all_minus_parent = key_cols_all_leaves - key_cols_parent_leaves puts_info key_cols_f, clade_id, key_cols_all_leaves puts_info key_cols_minus_parent_cols_f, clade_id, key_cols_all_minus_parent puts_info key_cols_minus_sibling_cols_f, clade_id, key_cols_all_minus_sibling end ensure info_f.close clade_members_f.close key_cols_f.close key_cols_minus_parent_cols_f.close key_cols_minus_sibling_cols_f.close end