lib/genevalidator/tabular_parser.rb in genevalidator-1.6.1 vs lib/genevalidator/tabular_parser.rb in genevalidator-1.6.2
- old
+ new
@@ -1,44 +1,48 @@
-require 'genevalidator/exceptions'
require 'csv'
+require 'forwardable'
+require 'genevalidator/sequences'
+require 'genevalidator/hsp'
+require 'genevalidator/exceptions'
+
#
module GeneValidator
TabularEntry = Struct.new(:filename, :type, :title, :footer, :xtitle,
:ytitle, :aux1, :aux2)
##
# This class parses the tabular output of BLAST (outfmt 6 & 7)
class TabularParser
+ extend Forwardable
+ def_delegators GeneValidator, :opt, :config
+
attr_reader :rows
attr_reader :tab_results
attr_reader :column_names
attr_reader :type
##
# Initializes the object
- # +file_content+ : String with the tabular BLAST output
- # +format+: format of the tabular output (comma/space delimited string)
- # +type+: :nucleotide or :mrna
- def initialize(filename, format, type)
+ def initialize(tab_file = opt[:blast_tabular_file],
+ format = opt[:blast_tabular_options], type = config[:type])
@column_names = format.gsub(/[-\d]/, '').split(/[ ,]/)
- @tab_results = analayse_tabular_file(filename)
- @rows = @tab_results.to_enum
@type = type
+ @tab_results = analayse_tabular_file(tab_file)
+ @rows = @tab_results.to_enum
end
##
#
def analayse_tabular_file(filename)
- tab_results = []
- file = File.read(filename)
- lines = CSV.parse(file, col_sep: "\t",
- skip_lines: /^#/,
- headers: @column_names)
+ results = []
+ file = File.read(filename)
+ lines = CSV.parse(file, col_sep: "\t", skip_lines: /^#/,
+ headers: @column_names)
lines.each do |line|
- tab_results << line.to_hash
+ results << line.to_hash
end
- tab_results
+ results
end
##
# move to next query
def next
@@ -56,21 +60,23 @@
##
#
def parse_next(query_id = nil)
current_id = @rows.peek['qseqid']
return [] if !query_id.nil? && current_id != query_id
- hits = @tab_results.partition { |h| h['qseqid'] == current_id }[0]
- hit_seq = initialise_classes(hits)
+ hit_seq = initialise_classes(current_id)
move_to_next_query
hit_seq
rescue StopIteration
return []
end
+ private
+
##
#
- def initialise_classes(hits)
+ def initialise_classes(current_id, tab_results = @tab_results)
+ hits = tab_results.partition { |h| h['qseqid'] == current_id }[0]
hit_list = []
grouped_hits = hits.group_by { |row| row['sseqid'] }
grouped_hits.each do |query_id, row|
hit_seq = Sequence.new
@@ -88,10 +94,10 @@
#
def initialise_all_hsps(current_query_id, hits, hit_seq)
hsps = hits.select { |row| row['sseqid'] == current_query_id }
hsps.each do |row|
hsp = Hsp.new
- hsp.init_tabular_attribute(row, type)
+ hsp.init_tabular_attribute(row)
hit_seq.hsp_list.push(hsp)
end
end
end
end