lib/pets/io.rb in pets-0.2.4 vs lib/pets/io.rb in pets-0.2.5
- old
+ new
@@ -1,6 +1,7 @@
require 'csv'
+require 'bio-vcf'
def load_hpo_ontology(hpo_file, excluded_hpo_file)
hpo = nil
if !hpo_file.include?('.json')
if !excluded_hpo_file.nil?
@@ -199,24 +200,43 @@
return profiles
end
def load_variants(variant_folder)
variants = {}
- Dir.glob(File.join(variant_folder, '*.tab')).each do |path|
- profile_id = File.basename(path, '.tab')
- vars = []
- File.open(path).each do |line|
- fields = line.chomp.split("\t")
- chr = fields[0]
- start = fields[1].to_i
- vars << [chr, start, start]
+ Dir.glob(File.join(variant_folder, '*.{tab,vcf,vcf.gz}')).each do |path|
+ profile_id, ext = File.basename(path).split(".", 2)
+ if ext == 'tab' || ext == 'txt'
+ vars = load_tabular_vars(path)
+ elsif ext == 'vcf' || ext == 'vcf.gz'
+ vars = load_vcf(path, ext)
end
variants[profile_id] = Genomic_Feature.new(vars)
end
return variants
end
+def load_tabular_vars(path)
+ vars = []
+ File.open(path).each do |line|
+ fields = line.chomp.split("\t")
+ chr = fields[0].gsub('chr','')
+ start = fields[1].to_i
+ vars << [chr, start, start]
+ end
+ return vars
+end
+
+def load_vcf(path, ext) # Some compressed files are fragmented internally. If so, VCFfile only reads first fragment
+ vars = [] # Use zcat original.vcf.gz | gzip > new.vcf.gz to obtain a contigous file
+ vcf = BioVcf::VCFfile.new(file: path, is_gz: ext == 'vcf.gz' ? true : false )
+ vcf.each do |var|
+ vars << [var.chrom.gsub('chr',''), var.pos, var.pos]
+ end
+ puts vars.length
+ return vars
+end
+
def load_evidences(evidences_path, hpo)
genomic_coordinates = {}
coord_files = Dir.glob(File.join(evidences_path, '*.coords'))
coord_files.each do |cd_f|
entity = File.basename(cd_f, '.coords')
@@ -240,9 +260,13 @@
fields = line.chomp.split("\t")
if header
header = false
else
entity, chr, strand, start, stop = fields
+ if chr == 'NA'
+ STDERR.puts "Warning: Record #{fields.inspect} is undefined"
+ next
+ end
coordinates[entity] = [chr, start.to_i, stop.to_i, strand]
end
end
return coordinates
end