lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.2 vs lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.3
- old
+ new
@@ -204,20 +204,24 @@
# is passed in. The following options are available:
#
# :reverse : do reverse if reverse is indicated (default true)
# :complement : do complement if reverse is indicated (default true)
# :phase : do set CDS phase (default false, normally ignore)
- # :trim : make sure sequence is multiple of 3 nucleotide bps (default false)
+ # :trim : make sure sequence is multiple of 3 nucleotide bps (default true)
#
- # there are two special options:
+ # special options:
#
# :raw : raw sequence (all above false)
- # :codonize : codon sequence (reverse, complement and trim are true)
+ # :codonize : codon sequence (reverse, complement, and trim are true)
+ # :fix : fix errors (default false)
#
- def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false }
+ def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true, :fix=>false, :debug=>false }
+ # default to nil, if not passed in
do_debug = options[:debug]
do_phase = options[:phase]
+ do_fix = options[:fix]
+ # default to true, if not passed in
do_reverse = (options[:reverse] == false ? false : true)
do_trim = (options[:trim] == false ? false : true)
do_complement = (options[:complement] == false ? false : true)
if options[:raw]
do_phase = false
@@ -272,21 +276,53 @@
seq = seq.join
if do_complement and do_reverse and orf_reverse
ntseq = Bio::Sequence::NA.new(seq)
seq = ntseq.forward_complement.upcase
end
+ # This is the place to fix sequences (e.g. the Wormbase bug)
+ if do_fix or @options[:fix] or @options[:fix_wormbase]
+ if @options[:fix_wormbase] and rec0.id.index('gene1')==0
+ # Wormbase gene1 only, so ignore rest
+ else
+ test_frame = 0
+ ntseq = Bio::Sequence::NA.new(seq)
+ aaseq = ntseq.translate
+ if aaseq.count('*') > 1
+ test_frame = 1
+ seq = seq[1..-1]
+ ntseq = Bio::Sequence::NA.new(seq)
+ aaseq = ntseq.translate
+ if aaseq.count('*') > 1
+ test_frame = 2
+ seq = seq[1..-1]
+ ntseq = Bio::Sequence::NA.new(seq)
+ aaseq = ntseq.translate
+ raise 'Validation problem '+rec0.id if aaseq.count('*') > 1
+ end
+ end
+ if test_frame > 0
+ warn rec0.id,"Frame adjusted to #{test_frame} (fix)"
+ end
+ end
+ end
if do_trim
reduce = seq.size % 3
seq = seq[0..(seq.size-1-reduce)] if reduce != 0
end
+ if @options[:validate]
+ ntseq = Bio::Sequence::NA.new(seq)
+ aaseq = ntseq.translate
+ raise 'Validate translation problem '+rec0.id+"\n"+seq if aaseq.count('*') > 1
+ end
+
retval = seq
retval
end
# Patch a sequence together from a Sequence string and an array
# of records and translate in the correct direction and frame. The options
- # are the same as for +assemble+.
- def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true }
+ # are the same as for +assemble+, except :trim defaults to true.
+ def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true }
seq = assemble(sequence, startpos, reclist, options)
ntseq = Bio::Sequence::NA.new(seq)
ntseq.translate
end