lib/pets/io.rb in pets-0.2.4 vs lib/pets/io.rb in pets-0.2.5

- old
+ new

@@ -1,6 +1,7 @@ require 'csv' +require 'bio-vcf' def load_hpo_ontology(hpo_file, excluded_hpo_file) hpo = nil if !hpo_file.include?('.json') if !excluded_hpo_file.nil? @@ -199,24 +200,43 @@ return profiles end def load_variants(variant_folder) variants = {} - Dir.glob(File.join(variant_folder, '*.tab')).each do |path| - profile_id = File.basename(path, '.tab') - vars = [] - File.open(path).each do |line| - fields = line.chomp.split("\t") - chr = fields[0] - start = fields[1].to_i - vars << [chr, start, start] + Dir.glob(File.join(variant_folder, '*.{tab,vcf,vcf.gz}')).each do |path| + profile_id, ext = File.basename(path).split(".", 2) + if ext == 'tab' || ext == 'txt' + vars = load_tabular_vars(path) + elsif ext == 'vcf' || ext == 'vcf.gz' + vars = load_vcf(path, ext) end variants[profile_id] = Genomic_Feature.new(vars) end return variants end +def load_tabular_vars(path) + vars = [] + File.open(path).each do |line| + fields = line.chomp.split("\t") + chr = fields[0].gsub('chr','') + start = fields[1].to_i + vars << [chr, start, start] + end + return vars +end + +def load_vcf(path, ext) # Some compressed files are fragmented internally. If so, VCFfile only reads first fragment + vars = [] # Use zcat original.vcf.gz | gzip > new.vcf.gz to obtain a contigous file + vcf = BioVcf::VCFfile.new(file: path, is_gz: ext == 'vcf.gz' ? true : false ) + vcf.each do |var| + vars << [var.chrom.gsub('chr',''), var.pos, var.pos] + end + puts vars.length + return vars +end + def load_evidences(evidences_path, hpo) genomic_coordinates = {} coord_files = Dir.glob(File.join(evidences_path, '*.coords')) coord_files.each do |cd_f| entity = File.basename(cd_f, '.coords') @@ -240,9 +260,13 @@ fields = line.chomp.split("\t") if header header = false else entity, chr, strand, start, stop = fields + if chr == 'NA' + STDERR.puts "Warning: Record #{fields.inspect} is undefined" + next + end coordinates[entity] = [chr, start.to_i, stop.to_i, strand] end end return coordinates end