lib/genevalidator/blast.rb in genevalidator-1.6.2 vs lib/genevalidator/blast.rb in genevalidator-1.6.3

- old
+ new

@@ -2,12 +2,12 @@ require 'bio-blastxmlparser' require 'forwardable' require 'genevalidator/exceptions' require 'genevalidator/hsp' -require 'genevalidator/sequences' require 'genevalidator/output' +require 'genevalidator/query' module GeneValidator # Contains methods that run BLAST and methods that analyse sequences class BlastUtils class << self @@ -34,11 +34,11 @@ blastcmd = "#{blast_type} -db '#{db}' -evalue #{EVALUE} -outfmt 5" \ " #{threads}" cmd = "echo \"#{query}\" | #{blastcmd}" - `#{cmd} 2>/dev/null` + `#{cmd} >/dev/null 2>&1` end ## # Runs BLAST on an input file # Params: @@ -52,26 +52,31 @@ def run_blast_on_input_file(input_file = opt[:input_fasta_file], db = opt[:db], seq_type = config[:type], num_threads = opt[:num_threads]) return if opt[:blast_xml_file] || opt[:blast_tabular_file] - $stderr.puts 'Running BLAST' + $stderr.puts 'Running BLAST. This may take a while.' opt[:blast_xml_file] = input_file + '.blast_xml' blast_type = (seq_type == :protein) ? 'blastp' : 'blastx' # -num_threads is not supported on remote databases threads = (opt[:db] !~ /remote/) ? "-num_threads #{num_threads}" : '' blastcmd = "#{blast_type} -query '#{input_file}'" \ " -out '#{opt[:blast_xml_file]}' -db #{db} " \ " -evalue #{EVALUE} -outfmt 5 #{threads}" - `#{blastcmd}` + `#{blastcmd} >/dev/null 2>&1` return unless File.zero?(opt[:blast_xml_file]) - $stderr.puts 'Blast failed to run on the input file. Please ensure that the' - $stderr.puts 'BLAST database exists and try again' - exit 1 + $stderr.puts 'Blast failed to run on the input file.' + if opt[:db] !~ /remote/ + $stderr.puts 'Please ensure that the BLAST database exists and try' + $stderr.puts 'again.' + else + $stderr.puts 'You are using BLAST with a remote database. Please' + $stderr.puts 'ensure that you have internet access and try again.' + end end ## # Parses the next query from the blast xml output query # Params: @@ -84,11 +89,11 @@ iter = iterator.next # parse blast the xml output and get the hits # hits obtained are proteins! (we use only blastp and blastx) iter.each do |hit| - seq = Sequence.new + seq = Query.new seq.length_protein = hit.len.to_i seq.type = :protein seq.identifier = hit.hit_id seq.definition = hit.hit_def @@ -124,11 +129,12 @@ if guess_sequence_type(current_hsp.query_alignment) != :protein fail SequenceTypeError end current_hsp.align_len = hsp.align_len.to_i current_hsp.identity = hsp.identity.to_i - current_hsp.pidentity = (100 * hsp.identity / (hsp.align_len + 0.0)).round(2) + current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f) + .round(2) hsps.push(current_hsp) end seq.hsp_list = hsps @@ -156,13 +162,14 @@ # nil, :nucleotide or :protein def type_of_sequences(fasta_format_string) # the first sequence does not need to have a fasta definition line sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?) # get all sequence types - sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact + sequence_types = sequences.collect { |seq| guess_sequence_type(seq) } + .uniq.compact return nil if sequence_types.empty? - return sequence_types.first if sequence_types.length == 1 + sequence_types.first if sequence_types.length == 1 end ## # Strips all non-letter characters. guestimates sequence based on that. # If less than 10 useable characters... returns nil