lib/cheripic/vcf.rb in cheripic-1.2.5 vs lib/cheripic/vcf.rb in cheripic-1.2.6
- old
+ new
@@ -7,37 +7,42 @@
require 'bio-samtools'
class Vcf
- def self.get_allele_freq(vcf_obj)
+ def self.get_allele_depth(vcf_obj)
# check if the vcf is from samtools (has DP4 and AF1 fields in INFO)
if vcf_obj.info.key?('DP4')
freq = vcf_obj.info['DP4'].split(',')
- depth = freq.inject { | sum, n | sum.to_f + n.to_f }
+ ref = freq[0].to_f + freq[1].to_f
alt = freq[2].to_f + freq[3].to_f
- allele_freq = alt / depth
- # allele_freq = vcf_obj.non_ref_allele_freq
# check if the vcf is from VarScan (has RD, AD and FREQ fields in FORMAT)
elsif vcf_obj.samples['1'].key?('RD')
+ ref = vcf_obj.samples['1']['RD'].to_f
alt = vcf_obj.samples['1']['AD'].to_f
- depth = vcf_obj.samples['1']['RD'].to_f + alt
- allele_freq = alt / depth
# check if the vcf is from GATK (has AD and GT fields in FORMAT)
elsif vcf_obj.samples['1'].key?('AD') and vcf_obj.samples['1']['AD'].include?(',')
freq = vcf_obj.samples['1']['AD'].split(',')
- allele_freq = freq[1].to_f / ( freq[0].to_f + freq[1].to_f )
+ ref = freq[0].to_i
+ alt = freq[1].to_i
# check if the vcf has has AF fields in INFO
elsif vcf_obj.info.key?('AF')
allele_freq = vcf_obj.info['AF'].to_f
+ depth = vcf_obj.info['DP'].to_i
+ alt = (depth * allele_freq).round
+ ref = depth - alt
else
raise VcfError.new 'not a supported vcf format (VarScan, GATK, Bcftools(Samtools), Vcf 4.0, 4.1 and 4.2)' +
" and check that it is one sample vcf\n"
end
- allele_freq
+ [ref, alt]
end
+ def self.get_allele_freq(vcf_obj)
+ ref, alt = get_allele_depth(vcf_obj)
+ alt.to_f/(ref + alt)
+ end
##Input: vcf file
##Ouput: lists of hm and ht SNPS and hash of all fragments with variants
def self.get_vars(vcf_file)
ht_low = Options.htlow
@@ -74,9 +79,28 @@
var_pos_mut[frag][:hom].delete(pos)
end
end
end
var_pos_mut
+ end
+
+ def self.to_pileup(v)
+ ref, alt = Vcf.get_allele_depth(v)
+ depth = ref + alt
+ alt_bases = '' + v.alt
+ ref_len = v.ref.length
+ alt_len = v.alt.length
+ if ref_len > alt_len
+ seq = v.ref[alt_len..-1]
+ alt_bases = '-' + seq.length.to_s + seq
+ v.ref = v.ref[0]
+ elsif ref_len < alt_len
+ seq = v.alt[ref_len..-1]
+ alt_bases = '+' + seq.length.to_s + seq
+ end
+ bases = ('.' * ref) + ( alt_bases * alt)
+ quality = 'D' * depth
+ [v.chrom, v.pos, v.ref, depth, bases, quality].join("\t")
end
end
end
\ No newline at end of file