lib/scaffolder/annotation_locator.rb in scaffolder-annotation-locator-0.0.1 vs lib/scaffolder/annotation_locator.rb in scaffolder-annotation-locator-0.1.0

- old
+ new

@@ -1,48 +1,55 @@ require 'delegate' require 'scaffolder' require 'bio' +require 'scaffolder/extensions' + class Scaffolder::AnnotationLocator < DelegateClass(Array) def initialize(scaffold_file,sequence_file,gff_file) @scaffold_file = scaffold_file @sequence_file = sequence_file @gff_file = gff_file updated_records = Array.new - scaffold.inject(0) do |length,entry| + scaffold.inject(0) do |prior_length,entry| if entry.entry_type == :sequence - updated_records << records[entry.source].map do |record| - update_record(record,entry,length) - end - end + records[entry.source].each do |record| - length + entry.sequence.length - end + # Don't include this record if it overlaps with an insert + next if record.overlap?(entry.inserts.map{|i| (i.open..i.close)}) - super updated_records.flatten - end + # Skip this record it lies in the start or stop trimmed regions + next if record.start < entry.start + next if record.end > entry.stop - def update_record(record,scaffold_entry,prior_length) - record.start -= scaffold_entry.start - 1 - record.end -= scaffold_entry.start - 1 + # Update record location by size differences of prior inserts + entry.inserts.select {|i| i.close < record.start }.each do |insert| + record.change_position_by insert.size_diff + end - if scaffold_entry.reverse - record.end = scaffold_entry.sequence.length - (record.end - 1) - record.start = scaffold_entry.sequence.length - (record.start - 1) + # Decrease record position by distance contig is trimmed at start + record.change_position_by(1 - entry.start) - record.end, record.start = record.start, record.end - record.strand = self.class.flip_strand(record.strand) - end + # Reverse complement record positions if contig is reversed + record.reverse_complement_by entry.sequence.length if entry.reverse - record.start += prior_length - record.end += prior_length + # Increase record position by length of prior contigs + record.change_position_by prior_length - record.seqname = "scaffold" - record + record.seqname = "scaffold" + + updated_records << record + end + end + + prior_length + entry.sequence.length + end + + super updated_records end def scaffold Scaffolder.new(YAML.load(File.read(@scaffold_file)),@sequence_file) end @@ -51,12 +58,8 @@ gff3 = Bio::GFF::GFF3.new(File.read(@gff_file)).records gff3.inject(Hash.new{|h,k| h[k] = Array.new }) do |hash,record| hash[record.seqname] << record hash end - end - - def self.flip_strand(strand) - strand == '+' ? '-' : '+' end end