lib/bio/db/gff/gffassemble.rb in bio-gff3-0.6.0 vs lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.0
- old
+ new
@@ -196,93 +196,93 @@
# Patch a sequence together from a Sequence string and an array
# of records. Note that rec positions are 1-based coordinates, relative
# to the landmark given in column 1 - in this case the sequence as it
# is passed in. The following options are available:
#
- # :phase : set phase (default true)
- # :reverse : do reverse if reverse is indicated (true)
- # :complement : do complement if reverse is indicated (true)
- # :trim : make sure sequence is multiple of 3 nucleotide bps (false)
+ # :reverse : do reverse if reverse is indicated (default true)
+ # :complement : do complement if reverse is indicated (default true)
+ # :phase : do set CDS phase (default false, normally ignore)
+ # :trim : make sure sequence is multiple of 3 nucleotide bps (default false)
#
# there are two special options:
#
# :raw : raw sequence (all above false)
- # :codonize : codon sequence (all above true)
+ # :codonize : codon sequence (reverse, complement and trim are true)
#
- def assemble sequence, startpos, reclist, options = { :phase=>true, :reverse=>true, :trim=>false, :complement=>false }
+ def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false }
+ do_debug = options[:debug]
do_phase = options[:phase]
- do_reverse = options[:reverse]
- do_trim = options[:trim]
- do_complement = options[:complement]
+ do_reverse = (options[:reverse] == false ? false : true)
+ do_trim = (options[:trim] == false ? false : true)
+ do_complement = (options[:complement] == false ? false : true)
if options[:raw]
do_phase = false
do_reverse = false
do_trim = false
do_complement = false
elsif options[:codonize]
- do_phase = true
+ do_phase = false
do_reverse = true
do_trim = true
do_complement = true
end
- retval = ""
sectionlist = Sections::sort(reclist)
- reverse = false
- # we assume strand is always the same
rec0 = sectionlist.first.rec
- reverse = (rec0.strand == '-') if rec0.strand
- if reverse
- # fetch phase from the last feature when reversed
- rec0 = sectionlist.last.rec
+ # we assume ORF is always read in the same direction
+ orf_reverse = (rec0.strand == '-')
+ orf_frame = startpos - 1
+ orf_frameshift = orf_frame % 3
+ sectionlist = sectionlist.reverse if orf_reverse
+ if do_debug
+ p "------------------"
+ p options
+ p [:reverse,do_reverse]
+ p [:complement,do_complement]
+ p [:trim,do_trim]
+ p [:orf_reverse, orf_reverse, rec0.strand]
end
- frame = 0
- frame = rec0.frame if rec0.frame
- sectionlist.each do | section |
- if sequence.kind_of?(Bio::FastaFormat)
- sequence = sequence.seq
- end
- rec = section.rec
- seq = sequence[(rec.start-1)..(rec.end-1)]
- retval += seq
+
+ if sequence.kind_of?(Bio::FastaFormat)
+ # BioRuby conversion
+ sequence = sequence.seq
end
- seq = retval
- if do_reverse
- # if strand is negative, reverse
- seq = seq.reverse if reverse
- end
- if do_phase
- # For forward strand features, phase is counted from the start
- # field. For reverse strand features, phase is counted from the end
- # field.
- #
- # With a reverse protein coding string in Wormbase
- # the phase appears to be disregarded - or rather handled
- # by start-stop. This is a hack.
- if do_reverse and reverse and (seq.size % 3 == 0)
- # do nothing
- else
- seq = seq[frame..-1] if frame != 0 # set phase
+ # Generate array of sequences
+ seq = sectionlist.map { | section |
+ rec = section.rec
+ s = sequence[(section.begin-1)..(section.end-1)]
+ if do_reverse and orf_reverse
+ s = s.reverse
end
- end
- if do_complement
- # if strand is negative, forward complement
- if reverse
- ntseq = Bio::Sequence::NA.new(seq)
- seq = ntseq.forward_complement.upcase
+ # Correct for phase. Unfortunately the use of phase is ambiguous.
+ # Here we check whether rec.start is in line with orf_frame. If it
+ # is, we correct for phase. Otherwise it is ignored.
+ if do_phase and rec.phase
+ phase = rec.phase.to_i
+ # if ((rec.start-startpos) % 3 == 0)
+ s = s[phase..-1]
+ # end
end
+ s
+ }
+ # p seq
+ seq = seq.join
+ if do_complement and do_reverse and orf_reverse
+ ntseq = Bio::Sequence::NA.new(seq)
+ seq = ntseq.forward_complement.upcase
end
if do_trim
reduce = seq.size % 3
seq = seq[0..(seq.size-1-reduce)] if reduce != 0
end
retval = seq
retval
end
# Patch a sequence together from a Sequence string and an array
- # of records and translate in the correct direction and frame
- def assembleAA sequence, startpos, rec
- seq = assemble(sequence, startpos, rec, :phase=>true, :reverse=>true, :complement=>true)
+ # of records and translate in the correct direction and frame. The options
+ # are the same as for +assemble+.
+ def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true }
+ seq = assemble(sequence, startpos, reclist, options)
ntseq = Bio::Sequence::NA.new(seq)
ntseq.translate
end
# Create a description for output