lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.2 vs lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.3

- old
+ new

@@ -204,20 +204,24 @@ # is passed in. The following options are available: # # :reverse : do reverse if reverse is indicated (default true) # :complement : do complement if reverse is indicated (default true) # :phase : do set CDS phase (default false, normally ignore) - # :trim : make sure sequence is multiple of 3 nucleotide bps (default false) + # :trim : make sure sequence is multiple of 3 nucleotide bps (default true) # - # there are two special options: + # special options: # # :raw : raw sequence (all above false) - # :codonize : codon sequence (reverse, complement and trim are true) + # :codonize : codon sequence (reverse, complement, and trim are true) + # :fix : fix errors (default false) # - def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false } + def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true, :fix=>false, :debug=>false } + # default to nil, if not passed in do_debug = options[:debug] do_phase = options[:phase] + do_fix = options[:fix] + # default to true, if not passed in do_reverse = (options[:reverse] == false ? false : true) do_trim = (options[:trim] == false ? false : true) do_complement = (options[:complement] == false ? false : true) if options[:raw] do_phase = false @@ -272,21 +276,53 @@ seq = seq.join if do_complement and do_reverse and orf_reverse ntseq = Bio::Sequence::NA.new(seq) seq = ntseq.forward_complement.upcase end + # This is the place to fix sequences (e.g. the Wormbase bug) + if do_fix or @options[:fix] or @options[:fix_wormbase] + if @options[:fix_wormbase] and rec0.id.index('gene1')==0 + # Wormbase gene1 only, so ignore rest + else + test_frame = 0 + ntseq = Bio::Sequence::NA.new(seq) + aaseq = ntseq.translate + if aaseq.count('*') > 1 + test_frame = 1 + seq = seq[1..-1] + ntseq = Bio::Sequence::NA.new(seq) + aaseq = ntseq.translate + if aaseq.count('*') > 1 + test_frame = 2 + seq = seq[1..-1] + ntseq = Bio::Sequence::NA.new(seq) + aaseq = ntseq.translate + raise 'Validation problem '+rec0.id if aaseq.count('*') > 1 + end + end + if test_frame > 0 + warn rec0.id,"Frame adjusted to #{test_frame} (fix)" + end + end + end if do_trim reduce = seq.size % 3 seq = seq[0..(seq.size-1-reduce)] if reduce != 0 end + if @options[:validate] + ntseq = Bio::Sequence::NA.new(seq) + aaseq = ntseq.translate + raise 'Validate translation problem '+rec0.id+"\n"+seq if aaseq.count('*') > 1 + end + retval = seq retval end # Patch a sequence together from a Sequence string and an array # of records and translate in the correct direction and frame. The options - # are the same as for +assemble+. - def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true } + # are the same as for +assemble+, except :trim defaults to true. + def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true } seq = assemble(sequence, startpos, reclist, options) ntseq = Bio::Sequence::NA.new(seq) ntseq.translate end