$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib')) require 'rbbt/sources/biomart' require 'rbbt/sources/entrez' require File.join(File.dirname(__FILE__), '../../lib/helpers') $taxs = [559292,4932] $native = "SGD ID" $url = "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab" $biomart_db = 'scerevisiae_gene_ensembl' $biomart_main = ['Entrez Gene ID', 'entrezgene'] file 'name' do |t| File.open(t.name, 'w') do |f| f.puts "Saccharomyces cerevisiae" end end file 'lexicon' do |t| lexicon = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true) merge_entrez(lexicon, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)}) merge_biomart(lexicon, $biomart_db, $biomart_main, [['Interpro Description' , "interpro_description"]]) lexicon = lexicon.slice(lexicon.fields - ["Entrez Gene ID"]) File.open(t.name, 'w') do |f| f.puts lexicon end end file 'identifiers' do |t| identifiers = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true) merge_entrez(identifiers, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)}) merge_biomart(identifiers, $biomart_db, $biomart_main, [['Associated Gene Name' , "external_gene_id"], ['Ensembl Gene ID', "ensembl_gene_id" ], ['Ensembl Protein ID', "ensembl_peptide_id" ], ['RefSeq Protein ID' , "refseq_peptide"] , ['UniProt/SwissProt ID' , "uniprot_swissprot"] , ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] , ['Protein ID' , "protein_id"] , ['EMBL (Genbank) ID' , "embl"] , # Affymetrix ['Affy yeast 2',"affy_yeast_2"], ['Affy yg s98', "affy_yg_s98"]]) File.open(t.name, 'w') do |f| f.puts identifiers end end task :default => ['name', 'lexicon', 'identifiers'] #require __FILE__.sub(/[^\/]*$/,'') + '../rake-include' # #$name = "Saccharomyces cerevisiae" # # #$native_id = "SGD DB Id" # #$entrez2native = { # :tax => 559292, # :fix => proc{|code| code.sub(/SGD:S0/,'S0') }, # :check => proc{|code| code.match(/^S0/)}, #} # #$lexicon = { # :file => { # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab", # :native => 0, # :extra => [4,3,5] # }, # :biomart => { # :database => 'scerevisiae_gene_ensembl', # :main => ['Entrez Gene ID', 'entrezgene'], # :extra => [ # ['Interpro Description' , "interpro_description"], # ], # :filter => [], # } # #} # #$identifiers = { # :file => { # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab", # :native => 0, # :extra => [], # }, # :biomart => { # :database => 'scerevisiae_gene_ensembl', # :main => ['Entrez Gene ID', 'entrezgene'], # :extra => [ # ['Associated Gene Name' , "external_gene_id"], # ['Ensembl Gene ID', "ensembl_gene_id" ], # ['Ensembl Protein ID', "ensembl_peptide_id" ], # ['RefSeq Protein ID' , "refseq_peptide"] , # ['UniProt/SwissProt ID' , "uniprot_swissprot"] , # ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] , # ['Protein ID' , "protein_id"] , # ['EMBL (Genbank) ID' , "embl"] , # # Affymetrix # ['Affy yeast 2',"affy_yeast_2"], # ['Affy yg s98', "affy_yg_s98"], # ], # :filter => [], # } #} # #$go = { # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/literature_curation/gene_association.sgd.gz", # :code => 1, # :go => 4, # :pmid => 5, #} # #$query = '"saccharomyces cerevisiae"[All Fields] AND ((("proteins"[TIAB] NOT Medline[SB]) OR "proteins"[MeSH Terms] OR protein[Text Word]) OR (("genes"[TIAB] NOT Medline[SB]) OR "genes"[MeSH Terms] OR gene[Text Word])) AND hasabstract[text] AND English[lang]' # #