lib/bio/db/embl/uniprotkb.rb in bio-2.0.4 vs lib/bio/db/embl/uniprotkb.rb in bio-2.0.5

- old
+ new

@@ -528,26 +528,47 @@ # # OH NCBI_TaxID=TaxID; HostName. # http://br.expasy.org/sprot/userman.html#OH_line def oh unless @data['OH'] - @data['OH'] = fetch('OH').split("\. ").map {|x| - if x =~ /NCBI_TaxID=(\d+);/ - taxid = $1 - else - raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):", - $!, "\n", get('OH'), "\n"].join - + oh = [] + a = fetch('OH').split(/(NCBI\_TaxID\=)(\d+)(\;)/) + t = catch :error do + taxid = nil + host_name = nil + while x = a.shift + x = x.to_s.strip + case x + when '' + next + when 'NCBI_TaxID=' + if taxid then + oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name}) + taxid = nil + host_name = nil + end + taxid = a.shift + throw :error, :missing_semicolon if a.shift != ';' + else + throw :error, :missing_taxid if host_name + host_name = x + host_name.sub!(/\.\z/, '') + end + end #while x... + if taxid then + oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name}) + elsif host_name then + throw :error, :missing_taxid_last end - if x =~ /NCBI_TaxID=\d+; (.+)/ - host_name = $1 - host_name.sub!(/\.$/, '') - else - host_name = nil - end - {'NCBI_TaxID' => taxid, 'HostName' => host_name} - } + nil + end #t = catch... + if t then + raise ArgumentError, + ["Error: Invalid OH line format (#{self.entry_id}):", + $!, "\n", get('OH'), "\n"].join + end + @data['OH'] = oh end @data['OH'] end @@ -920,10 +941,11 @@ end end def cc_alternative_products(data) + return nil unless data ap = data.join('') return ap unless ap # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+ tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "", @@ -958,10 +980,11 @@ end private :cc_alternative_products_variants def cc_biophysiochemical_properties(data) + return nil unless data data = data[0] hash = {'Absorption' => {}, 'Kinetic parameters' => {}, 'pH dependence' => "", @@ -993,19 +1016,21 @@ end private :cc_biophysiochemical_properties def cc_caution(data) + return nil unless data data.join('') end private :cc_caution # returns conteins in a line of the CC INTERACTION section. # # CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280; def cc_interaction(data) + return nil unless data str = data.join('') it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/) it.map {|ent| ent.map! {|x| x.strip } if ent[0] =~ /^(.+):(.+)/ @@ -1057,18 +1082,20 @@ end private :cc_mass_spectrometry def cc_pathway(data) + return nil unless data data.map {|x| x.sub(/\.$/, '') }.map {|x| x.split(/; | and |: /) }[0] end private :cc_pathway def cc_rna_editing(data) + return nil unless data data = data.join('') entry = {'Modified_positions' => [], 'Note' => ""} if data =~ /Modified_positions=(.+?)(\.|;)/ entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ') else @@ -1081,10 +1108,11 @@ end private :cc_rna_editing def cc_subcellular_location(data) + return nil unless data data.map {|x| x.split('. ').map {|y| y.split('; ').map {|z| z.sub(/\.$/, '') } @@ -1099,10 +1127,11 @@ # CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format: # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress]. #++ def cc_web_resource(data) + return nil unless data data.map {|x| entry = {'Name' => nil, 'Note' => nil, 'URL' => nil} x.split(';').each do |y| case y when /(Name|Note)\=(.+)/ @@ -1233,13 +1262,17 @@ cur_ft = nil cont = false begin ftlines.each do |line| if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ line - cur_ft = [$1.to_s, # Feature Name - "#{$2}#{$4}", # From - $5.to_s, # To - [] # Qualifiers + f_name = $1.to_s + f_from = "#{$2}#{$4}" + f_to = $5.to_s + f_to = f_from if f_to.empty? + cur_ft = [f_name, # Feature Name + f_from, # From + f_to, # To + [] # Qualifiers ] table.push cur_ft cont = false elsif cont && /^FT {19}/ =~ line str = $'