lib/genevalidator/tabular_parser.rb in genevalidator-1.6.1 vs lib/genevalidator/tabular_parser.rb in genevalidator-1.6.2

- old
+ new

@@ -1,44 +1,48 @@ -require 'genevalidator/exceptions' require 'csv' +require 'forwardable' +require 'genevalidator/sequences' +require 'genevalidator/hsp' +require 'genevalidator/exceptions' + # module GeneValidator TabularEntry = Struct.new(:filename, :type, :title, :footer, :xtitle, :ytitle, :aux1, :aux2) ## # This class parses the tabular output of BLAST (outfmt 6 & 7) class TabularParser + extend Forwardable + def_delegators GeneValidator, :opt, :config + attr_reader :rows attr_reader :tab_results attr_reader :column_names attr_reader :type ## # Initializes the object - # +file_content+ : String with the tabular BLAST output - # +format+: format of the tabular output (comma/space delimited string) - # +type+: :nucleotide or :mrna - def initialize(filename, format, type) + def initialize(tab_file = opt[:blast_tabular_file], + format = opt[:blast_tabular_options], type = config[:type]) @column_names = format.gsub(/[-\d]/, '').split(/[ ,]/) - @tab_results = analayse_tabular_file(filename) - @rows = @tab_results.to_enum @type = type + @tab_results = analayse_tabular_file(tab_file) + @rows = @tab_results.to_enum end ## # def analayse_tabular_file(filename) - tab_results = [] - file = File.read(filename) - lines = CSV.parse(file, col_sep: "\t", - skip_lines: /^#/, - headers: @column_names) + results = [] + file = File.read(filename) + lines = CSV.parse(file, col_sep: "\t", skip_lines: /^#/, + headers: @column_names) lines.each do |line| - tab_results << line.to_hash + results << line.to_hash end - tab_results + results end ## # move to next query def next @@ -56,21 +60,23 @@ ## # def parse_next(query_id = nil) current_id = @rows.peek['qseqid'] return [] if !query_id.nil? && current_id != query_id - hits = @tab_results.partition { |h| h['qseqid'] == current_id }[0] - hit_seq = initialise_classes(hits) + hit_seq = initialise_classes(current_id) move_to_next_query hit_seq rescue StopIteration return [] end + private + ## # - def initialise_classes(hits) + def initialise_classes(current_id, tab_results = @tab_results) + hits = tab_results.partition { |h| h['qseqid'] == current_id }[0] hit_list = [] grouped_hits = hits.group_by { |row| row['sseqid'] } grouped_hits.each do |query_id, row| hit_seq = Sequence.new @@ -88,10 +94,10 @@ # def initialise_all_hsps(current_query_id, hits, hit_seq) hsps = hits.select { |row| row['sseqid'] == current_query_id } hsps.each do |row| hsp = Hsp.new - hsp.init_tabular_attribute(row, type) + hsp.init_tabular_attribute(row) hit_seq.hsp_list.push(hsp) end end end end