lib/genevalidator/blast.rb in genevalidator-1.6.2 vs lib/genevalidator/blast.rb in genevalidator-1.6.3
- old
+ new
@@ -2,12 +2,12 @@
require 'bio-blastxmlparser'
require 'forwardable'
require 'genevalidator/exceptions'
require 'genevalidator/hsp'
-require 'genevalidator/sequences'
require 'genevalidator/output'
+require 'genevalidator/query'
module GeneValidator
# Contains methods that run BLAST and methods that analyse sequences
class BlastUtils
class << self
@@ -34,11 +34,11 @@
blastcmd = "#{blast_type} -db '#{db}' -evalue #{EVALUE} -outfmt 5" \
" #{threads}"
cmd = "echo \"#{query}\" | #{blastcmd}"
- `#{cmd} 2>/dev/null`
+ `#{cmd} >/dev/null 2>&1`
end
##
# Runs BLAST on an input file
# Params:
@@ -52,26 +52,31 @@
def run_blast_on_input_file(input_file = opt[:input_fasta_file],
db = opt[:db], seq_type = config[:type],
num_threads = opt[:num_threads])
return if opt[:blast_xml_file] || opt[:blast_tabular_file]
- $stderr.puts 'Running BLAST'
+ $stderr.puts 'Running BLAST. This may take a while.'
opt[:blast_xml_file] = input_file + '.blast_xml'
blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
# -num_threads is not supported on remote databases
threads = (opt[:db] !~ /remote/) ? "-num_threads #{num_threads}" : ''
blastcmd = "#{blast_type} -query '#{input_file}'" \
" -out '#{opt[:blast_xml_file]}' -db #{db} " \
" -evalue #{EVALUE} -outfmt 5 #{threads}"
- `#{blastcmd}`
+ `#{blastcmd} >/dev/null 2>&1`
return unless File.zero?(opt[:blast_xml_file])
- $stderr.puts 'Blast failed to run on the input file. Please ensure that the'
- $stderr.puts 'BLAST database exists and try again'
- exit 1
+ $stderr.puts 'Blast failed to run on the input file.'
+ if opt[:db] !~ /remote/
+ $stderr.puts 'Please ensure that the BLAST database exists and try'
+ $stderr.puts 'again.'
+ else
+ $stderr.puts 'You are using BLAST with a remote database. Please'
+ $stderr.puts 'ensure that you have internet access and try again.'
+ end
end
##
# Parses the next query from the blast xml output query
# Params:
@@ -84,11 +89,11 @@
iter = iterator.next
# parse blast the xml output and get the hits
# hits obtained are proteins! (we use only blastp and blastx)
iter.each do |hit|
- seq = Sequence.new
+ seq = Query.new
seq.length_protein = hit.len.to_i
seq.type = :protein
seq.identifier = hit.hit_id
seq.definition = hit.hit_def
@@ -124,11 +129,12 @@
if guess_sequence_type(current_hsp.query_alignment) != :protein
fail SequenceTypeError
end
current_hsp.align_len = hsp.align_len.to_i
current_hsp.identity = hsp.identity.to_i
- current_hsp.pidentity = (100 * hsp.identity / (hsp.align_len + 0.0)).round(2)
+ current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f)
+ .round(2)
hsps.push(current_hsp)
end
seq.hsp_list = hsps
@@ -156,13 +162,14 @@
# nil, :nucleotide or :protein
def type_of_sequences(fasta_format_string)
# the first sequence does not need to have a fasta definition line
sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
# get all sequence types
- sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
+ .uniq.compact
return nil if sequence_types.empty?
- return sequence_types.first if sequence_types.length == 1
+ sequence_types.first if sequence_types.length == 1
end
##
# Strips all non-letter characters. guestimates sequence based on that.
# If less than 10 useable characters... returns nil