require 'genevalidator/sequences' require 'genevalidator/exceptions' require 'bio-blastxmlparser' require 'net/http' require 'open-uri' require 'uri' require 'io/console' require 'yaml' module GeneValidator # Gets the raw sequences for each hit in a BLAST output file module GetRawSequences class <([\w\W\d]+)})[0][0] web_env = result.scan(%r{<\bWebEnv\b>([\w\W\d]+)})[0][0] uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \ 'rettype=fasta&retmode=text&retstart=0&retmax=1&' \ "db=protein&query_key=#{query}&WebEnv=#{web_env}" result = Net::HTTP.get(URI.parse(uri)) raw_seqs = result[0..result.length - 2] unless raw_seqs.downcase.index(/error/).nil? puts '*** Error: There was an error in obtaining the raw sequence' \ ' of a BLAST hit. Please ensure that you have internet access.' exit 1 end raw_seqs end def assert_table_has_correct_no_of_collumns(rows, table_headers) rows.each do |row| unless row.length == table_headers.length puts '*** Error: The BLAST tabular file cannot be parsed. This is' \ ' could possibly be due to an incorrect BLAST tabular' \ ' options ("-o", "--blast_tabular_options") being supplied.' \ ' Please correct this and try again.' exit 1 end break # break after checking the first column end end end end end