lib/bio/db/gff/gffassemble.rb in bio-gff3-0.6.0 vs lib/bio/db/gff/gffassemble.rb in bio-gff3-0.8.0

- old
+ new

@@ -196,93 +196,93 @@ # Patch a sequence together from a Sequence string and an array # of records. Note that rec positions are 1-based coordinates, relative # to the landmark given in column 1 - in this case the sequence as it # is passed in. The following options are available: # - # :phase : set phase (default true) - # :reverse : do reverse if reverse is indicated (true) - # :complement : do complement if reverse is indicated (true) - # :trim : make sure sequence is multiple of 3 nucleotide bps (false) + # :reverse : do reverse if reverse is indicated (default true) + # :complement : do complement if reverse is indicated (default true) + # :phase : do set CDS phase (default false, normally ignore) + # :trim : make sure sequence is multiple of 3 nucleotide bps (default false) # # there are two special options: # # :raw : raw sequence (all above false) - # :codonize : codon sequence (all above true) + # :codonize : codon sequence (reverse, complement and trim are true) # - def assemble sequence, startpos, reclist, options = { :phase=>true, :reverse=>true, :trim=>false, :complement=>false } + def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false } + do_debug = options[:debug] do_phase = options[:phase] - do_reverse = options[:reverse] - do_trim = options[:trim] - do_complement = options[:complement] + do_reverse = (options[:reverse] == false ? false : true) + do_trim = (options[:trim] == false ? false : true) + do_complement = (options[:complement] == false ? false : true) if options[:raw] do_phase = false do_reverse = false do_trim = false do_complement = false elsif options[:codonize] - do_phase = true + do_phase = false do_reverse = true do_trim = true do_complement = true end - retval = "" sectionlist = Sections::sort(reclist) - reverse = false - # we assume strand is always the same rec0 = sectionlist.first.rec - reverse = (rec0.strand == '-') if rec0.strand - if reverse - # fetch phase from the last feature when reversed - rec0 = sectionlist.last.rec + # we assume ORF is always read in the same direction + orf_reverse = (rec0.strand == '-') + orf_frame = startpos - 1 + orf_frameshift = orf_frame % 3 + sectionlist = sectionlist.reverse if orf_reverse + if do_debug + p "------------------" + p options + p [:reverse,do_reverse] + p [:complement,do_complement] + p [:trim,do_trim] + p [:orf_reverse, orf_reverse, rec0.strand] end - frame = 0 - frame = rec0.frame if rec0.frame - sectionlist.each do | section | - if sequence.kind_of?(Bio::FastaFormat) - sequence = sequence.seq - end - rec = section.rec - seq = sequence[(rec.start-1)..(rec.end-1)] - retval += seq + + if sequence.kind_of?(Bio::FastaFormat) + # BioRuby conversion + sequence = sequence.seq end - seq = retval - if do_reverse - # if strand is negative, reverse - seq = seq.reverse if reverse - end - if do_phase - # For forward strand features, phase is counted from the start - # field. For reverse strand features, phase is counted from the end - # field. - # - # With a reverse protein coding string in Wormbase - # the phase appears to be disregarded - or rather handled - # by start-stop. This is a hack. - if do_reverse and reverse and (seq.size % 3 == 0) - # do nothing - else - seq = seq[frame..-1] if frame != 0 # set phase + # Generate array of sequences + seq = sectionlist.map { | section | + rec = section.rec + s = sequence[(section.begin-1)..(section.end-1)] + if do_reverse and orf_reverse + s = s.reverse end - end - if do_complement - # if strand is negative, forward complement - if reverse - ntseq = Bio::Sequence::NA.new(seq) - seq = ntseq.forward_complement.upcase + # Correct for phase. Unfortunately the use of phase is ambiguous. + # Here we check whether rec.start is in line with orf_frame. If it + # is, we correct for phase. Otherwise it is ignored. + if do_phase and rec.phase + phase = rec.phase.to_i + # if ((rec.start-startpos) % 3 == 0) + s = s[phase..-1] + # end end + s + } + # p seq + seq = seq.join + if do_complement and do_reverse and orf_reverse + ntseq = Bio::Sequence::NA.new(seq) + seq = ntseq.forward_complement.upcase end if do_trim reduce = seq.size % 3 seq = seq[0..(seq.size-1-reduce)] if reduce != 0 end retval = seq retval end # Patch a sequence together from a Sequence string and an array - # of records and translate in the correct direction and frame - def assembleAA sequence, startpos, rec - seq = assemble(sequence, startpos, rec, :phase=>true, :reverse=>true, :complement=>true) + # of records and translate in the correct direction and frame. The options + # are the same as for +assemble+. + def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true } + seq = assemble(sequence, startpos, reclist, options) ntseq = Bio::Sequence::NA.new(seq) ntseq.translate end # Create a description for output