lib/cheripic/vcf.rb in cheripic-1.2.5 vs lib/cheripic/vcf.rb in cheripic-1.2.6

- old
+ new

@@ -7,37 +7,42 @@ require 'bio-samtools' class Vcf - def self.get_allele_freq(vcf_obj) + def self.get_allele_depth(vcf_obj) # check if the vcf is from samtools (has DP4 and AF1 fields in INFO) if vcf_obj.info.key?('DP4') freq = vcf_obj.info['DP4'].split(',') - depth = freq.inject { | sum, n | sum.to_f + n.to_f } + ref = freq[0].to_f + freq[1].to_f alt = freq[2].to_f + freq[3].to_f - allele_freq = alt / depth - # allele_freq = vcf_obj.non_ref_allele_freq # check if the vcf is from VarScan (has RD, AD and FREQ fields in FORMAT) elsif vcf_obj.samples['1'].key?('RD') + ref = vcf_obj.samples['1']['RD'].to_f alt = vcf_obj.samples['1']['AD'].to_f - depth = vcf_obj.samples['1']['RD'].to_f + alt - allele_freq = alt / depth # check if the vcf is from GATK (has AD and GT fields in FORMAT) elsif vcf_obj.samples['1'].key?('AD') and vcf_obj.samples['1']['AD'].include?(',') freq = vcf_obj.samples['1']['AD'].split(',') - allele_freq = freq[1].to_f / ( freq[0].to_f + freq[1].to_f ) + ref = freq[0].to_i + alt = freq[1].to_i # check if the vcf has has AF fields in INFO elsif vcf_obj.info.key?('AF') allele_freq = vcf_obj.info['AF'].to_f + depth = vcf_obj.info['DP'].to_i + alt = (depth * allele_freq).round + ref = depth - alt else raise VcfError.new 'not a supported vcf format (VarScan, GATK, Bcftools(Samtools), Vcf 4.0, 4.1 and 4.2)' + " and check that it is one sample vcf\n" end - allele_freq + [ref, alt] end + def self.get_allele_freq(vcf_obj) + ref, alt = get_allele_depth(vcf_obj) + alt.to_f/(ref + alt) + end ##Input: vcf file ##Ouput: lists of hm and ht SNPS and hash of all fragments with variants def self.get_vars(vcf_file) ht_low = Options.htlow @@ -74,9 +79,28 @@ var_pos_mut[frag][:hom].delete(pos) end end end var_pos_mut + end + + def self.to_pileup(v) + ref, alt = Vcf.get_allele_depth(v) + depth = ref + alt + alt_bases = '' + v.alt + ref_len = v.ref.length + alt_len = v.alt.length + if ref_len > alt_len + seq = v.ref[alt_len..-1] + alt_bases = '-' + seq.length.to_s + seq + v.ref = v.ref[0] + elsif ref_len < alt_len + seq = v.alt[ref_len..-1] + alt_bases = '+' + seq.length.to_s + seq + end + bases = ('.' * ref) + ( alt_bases * alt) + quality = 'D' * depth + [v.chrom, v.pos, v.ref, depth, bases, quality].join("\t") end end end \ No newline at end of file