lib/rbbt/sources/organism.rb in rbbt-sources-1.2.0 vs lib/rbbt/sources/organism.rb in rbbt-sources-2.0.0

- old
+ new

@@ -1,20 +1,82 @@ require 'rbbt' require 'rbbt/resource' -require 'rbbt/resource/with_key' module Organism extend Resource self.pkgdir = "rbbt" self.subdir = "share/organisms" - ["Hsa", "Mmu", "Rno", "Sce"].each do |organism| + def self.installable_organisms + Rbbt.share.install.Organism.find.glob('???').collect{|f| File.basename(f)} + end + + + Organism.installable_organisms.each do |organism| claim Organism[organism], :rake, Rbbt.share.install.Organism[organism].Rakefile.find module_eval "#{ organism } = with_key '#{organism}'" end + Rbbt.claim Rbbt.software.opt.bin.liftOver, :url, "http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver" + + def self.hg_build(organism) + require 'rbbt/sources/ensembl_ftp' + + raise "Only organism 'Hsa' (Homo sapiens) supported" unless organism =~ /^Hsa/ + + return 'hg19' unless organism =~ /\// + date = organism.split("/")[1] + + release = Ensembl.releases[date] + + release.sub(/.*-/,'').to_i > 54 ? 'hg19' : 'hg18' + end + + def self.liftOver(positions, source, target) + + source_hg = hg_build(source) + target_hg = hg_build(target) + + case + when (source_hg == 'hg19' and target_hg == 'hg18') + map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg18.over.chain.gz" + when (source_hg == 'hg18' and target_hg == 'hg19') + map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz" + else + return positions + end + + positions_bed = positions.collect{|position| chr, pos = position.split(":").values_at(0,1); ["chr" << chr, pos.to_i-1, pos, position] * "\t"} * "\n" + "\n" + new_positions = {} + + TmpFile.with_file(positions_bed) do |source_bed| + TmpFile.with_file() do |unmapped_file| + TmpFile.with_file() do |map_file| + + + Open.write(map_file, Open.read(map_url)) + new_mutations = TmpFile.with_file() do |target_bed| + FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find) + CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read + Open.read(target_bed) do |line| + chr, position_alt, position, name = line.chomp.split("\t") + chr.sub! /chr/, '' + + old_chr, old_position, *rest = name.split(":") + new_positions[name] = ([chr, position].concat rest) * ":" + end + end + end + end + end + + positions.collect do |position| + new_positions[position] + end + end + class OrganismNotProcessedError < StandardError; end def self.attach_translations(org, tsv, target = nil, fields = nil, options = {}) Log.high "Attaching Translations for #{ org.inspect }, target #{target.inspect}, fields #{fields.inspect}" options = Misc.add_defaults options, :persist => true, :case_insensitive => false @@ -51,32 +113,38 @@ index[list].first end end end - def self.guess_id(org, values, identifiers = nil) - identifiers ||= TSV.setup(Organism.identifiers(org), :persist => true) - field_matches = identifiers.field_matches(values) - field_matches.sort_by{|field, matches| matches.uniq.length}.last - end - def self.guess_id(org, values) field_matches = TSV.field_match_counts(Organism.identifiers(org).find, values) field_matches.sort_by{|field, count| count.to_i}.last end - def self.organisms Dir.glob(File.join(Organism.root.find, '*')).collect{|f| File.basename(f)} end - def self.name(organism) - Organism.scientific_name(organism).read.strip + def self.scientific_name(organism) + Organism[organism]["scientific_name"].produce.read.strip end def self.organism(name) organisms.select{|organism| - organism == name or Organism.name(organism) =~ /#{ name }/i + organism == name or Organism.scientific_name(organism) =~ /#{ name }/i }.first end + def self.known_ids(name) + TSV::Parser.new(Organism.identifiers(name).open).all_fields + end + + def self.entrez_taxid_organism(taxid) + all_organisms = Organism.installable_organisms + + all_organisms.each do |organism| + return organism if Organism.entrez_taxids(organism).read.split("\n").include? taxid.to_s + end + + raise "No organism identified for taxid #{taxid}. Supported organism are: #{all_organisms * ", "}" + end end