lib/pets/parsers/cohort_parser.rb in pets-0.2.4 vs lib/pets/parsers/cohort_parser.rb in pets-0.2.5
- old
+ new
@@ -1,10 +1,11 @@
class Cohort_Parser
def self.load(options)
fields2extract = get_fields2extract(options)
field_numbers = fields2extract.values
records = read_records(options, fields2extract, field_numbers)
+ options[:extracted_fields] = fields2extract.keys
cohort, rejected_terms, rejected_recs = create_cohort(records, options)
return cohort, rejected_terms, rejected_recs
end
def self.read_records(options, fields2extract, field_numbers)
@@ -44,11 +45,11 @@
return records
end
def self.get_fields2extract(options)
fields2extract = {}
- [:id_col, :ont_col, :chromosome_col, :start_col, :end_col].each do |field|
+ [:id_col, :ont_col, :chromosome_col, :start_col, :end_col, :sex_col].each do |field|
col = options[field]
if !col.nil?
col = col.to_i if !options[:header]
fields2extract[field] = col
end
@@ -68,11 +69,11 @@
rejected_recs = []
cohort = Cohort.new()
records.each do |id, record|
rec = record.first
terms = rec.first
- if options[:names]
+ if options[:names] # Translate hpo names 2 codes
init_term_number = terms.length
terms, rec_rejected_terms = ont.translate_names(terms)
if !rec_rejected_terms.empty?
STDERR.puts "WARNING: record #{id} has the unknown term NAMES '#{rec_rejected_terms.join(',')}'. Terms removed."
rejected_terms.concat(rec_rejected_terms)
@@ -85,10 +86,14 @@
if rec.length > 1 # there is genomic region attributes
variants = record.map{|v| v[1..3] }
else
variants = [] # Not exists genomic region attributes so we create a empty array
end
- cohort.add_record([id, terms, check_variants(variants)])
+ other_attr = {}
+ if options[:extracted_fields].include?(:sex_col) # Check for additional attributes. -1 is applied to ignore :id in extracted fields
+ other_attr[:sex] = record.first[options[:extracted_fields].index(:sex_col) -1]
+ end
+ cohort.add_record([id, terms, check_variants(variants)], other_attr)
end
return cohort, rejected_terms.uniq, rejected_recs
end
def self.check_variants(vars)