lib/scaffolder/annotation_locator.rb in scaffolder-annotation-locator-0.0.1 vs lib/scaffolder/annotation_locator.rb in scaffolder-annotation-locator-0.1.0
- old
+ new
@@ -1,48 +1,55 @@
require 'delegate'
require 'scaffolder'
require 'bio'
+require 'scaffolder/extensions'
+
class Scaffolder::AnnotationLocator < DelegateClass(Array)
def initialize(scaffold_file,sequence_file,gff_file)
@scaffold_file = scaffold_file
@sequence_file = sequence_file
@gff_file = gff_file
updated_records = Array.new
- scaffold.inject(0) do |length,entry|
+ scaffold.inject(0) do |prior_length,entry|
if entry.entry_type == :sequence
- updated_records << records[entry.source].map do |record|
- update_record(record,entry,length)
- end
- end
+ records[entry.source].each do |record|
- length + entry.sequence.length
- end
+ # Don't include this record if it overlaps with an insert
+ next if record.overlap?(entry.inserts.map{|i| (i.open..i.close)})
- super updated_records.flatten
- end
+ # Skip this record it lies in the start or stop trimmed regions
+ next if record.start < entry.start
+ next if record.end > entry.stop
- def update_record(record,scaffold_entry,prior_length)
- record.start -= scaffold_entry.start - 1
- record.end -= scaffold_entry.start - 1
+ # Update record location by size differences of prior inserts
+ entry.inserts.select {|i| i.close < record.start }.each do |insert|
+ record.change_position_by insert.size_diff
+ end
- if scaffold_entry.reverse
- record.end = scaffold_entry.sequence.length - (record.end - 1)
- record.start = scaffold_entry.sequence.length - (record.start - 1)
+ # Decrease record position by distance contig is trimmed at start
+ record.change_position_by(1 - entry.start)
- record.end, record.start = record.start, record.end
- record.strand = self.class.flip_strand(record.strand)
- end
+ # Reverse complement record positions if contig is reversed
+ record.reverse_complement_by entry.sequence.length if entry.reverse
- record.start += prior_length
- record.end += prior_length
+ # Increase record position by length of prior contigs
+ record.change_position_by prior_length
- record.seqname = "scaffold"
- record
+ record.seqname = "scaffold"
+
+ updated_records << record
+ end
+ end
+
+ prior_length + entry.sequence.length
+ end
+
+ super updated_records
end
def scaffold
Scaffolder.new(YAML.load(File.read(@scaffold_file)),@sequence_file)
end
@@ -51,12 +58,8 @@
gff3 = Bio::GFF::GFF3.new(File.read(@gff_file)).records
gff3.inject(Hash.new{|h,k| h[k] = Array.new }) do |hash,record|
hash[record.seqname] << record
hash
end
- end
-
- def self.flip_strand(strand)
- strand == '+' ? '-' : '+'
end
end