lib/bio/db/gff/gffparser.rb in bio-gff3-0.8.3 vs lib/bio/db/gff/gffparser.rb in bio-gff3-0.8.4

- old
+ new

@@ -21,28 +21,30 @@ return if rec.comment # skip GFF comments id = Record::formatID(rec) @count_ids.add(id) @count_seqnames.add(rec.seqname) - if COMPONENT_TYPES.include?(rec.feature_type) + is_component = COMPONENT_TYPES.include?(rec.feature_type) + if is_component # check for container ID warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil @componentlist[id] = rec info "Added #{rec.feature_type} with component ID #{id}" - else - case rec.feature_type - when 'mRNA' || 'SO:0000234' - @mrnalist.add(id,rec) - when 'CDS' || 'SO:0000316' - @cdslist.add(id,rec) - when 'exon' || 'SO:0000147' - @exonlist.add(id,rec) - else - if !IGNORE_FEATURES.include?(rec.feature_type) - @unrecognized_features[rec.feature_type] = true - end - end + end + case rec.feature_type + when 'gene' || 'SO:0000704' + @orflist.add(id,rec) + when 'mRNA' || 'SO:0000234' + @mrnalist.add(id,rec) + when 'CDS' || 'SO:0000316' + @cdslist.add(id,rec) + when 'exon' || 'SO:0000147' + @exonlist.add(id,rec) + else + if !is_component and !IGNORE_FEATURES.include?(rec.feature_type) + @unrecognized_features[rec.feature_type] = true + end end end def validate_mrnas return if not @options[:validate] @@ -78,10 +80,16 @@ end end # p :inmemory, @sequencelist end + # Yield the id, recs, containing component and sequence of genes + def each_gene + parse if !@orflist + each_item(@orflist) { |id, recs, component | yield id, recs, component } + end + # Yield the id, recs, containing component and sequence of mRNAs def each_mRNA parse if !@mrnalist each_item(@mrnalist) { |id, recs, component | yield id, recs, component } end @@ -94,9 +102,24 @@ # Yield the id, recs, and containing component def each_exon parse if !@exonlist each_item(@exonlist) { |id, recs, component | yield id, recs, component } + end + + # Yield a unique description and the sequence + def each_gene_seq + each_gene do | id, reclist, component | + if component + sequence = @sequencelist[component.seqname] + # p sequence + if sequence + yield description(id,component,reclist), assemble(sequence,component.start,reclist) + else + error "No sequence information for",id + end + end + end end # Yield a unique description and the sequence def each_mRNA_seq each_mRNA do | id, reclist, component |