lib/bio/db/embl/uniprotkb.rb in bio-2.0.4 vs lib/bio/db/embl/uniprotkb.rb in bio-2.0.5
- old
+ new
@@ -528,26 +528,47 @@
#
# OH NCBI_TaxID=TaxID; HostName.
# http://br.expasy.org/sprot/userman.html#OH_line
def oh
unless @data['OH']
- @data['OH'] = fetch('OH').split("\. ").map {|x|
- if x =~ /NCBI_TaxID=(\d+);/
- taxid = $1
- else
- raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
- $!, "\n", get('OH'), "\n"].join
-
+ oh = []
+ a = fetch('OH').split(/(NCBI\_TaxID\=)(\d+)(\;)/)
+ t = catch :error do
+ taxid = nil
+ host_name = nil
+ while x = a.shift
+ x = x.to_s.strip
+ case x
+ when ''
+ next
+ when 'NCBI_TaxID='
+ if taxid then
+ oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name})
+ taxid = nil
+ host_name = nil
+ end
+ taxid = a.shift
+ throw :error, :missing_semicolon if a.shift != ';'
+ else
+ throw :error, :missing_taxid if host_name
+ host_name = x
+ host_name.sub!(/\.\z/, '')
+ end
+ end #while x...
+ if taxid then
+ oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name})
+ elsif host_name then
+ throw :error, :missing_taxid_last
end
- if x =~ /NCBI_TaxID=\d+; (.+)/
- host_name = $1
- host_name.sub!(/\.$/, '')
- else
- host_name = nil
- end
- {'NCBI_TaxID' => taxid, 'HostName' => host_name}
- }
+ nil
+ end #t = catch...
+ if t then
+ raise ArgumentError,
+ ["Error: Invalid OH line format (#{self.entry_id}):",
+ $!, "\n", get('OH'), "\n"].join
+ end
+ @data['OH'] = oh
end
@data['OH']
end
@@ -920,10 +941,11 @@
end
end
def cc_alternative_products(data)
+ return nil unless data
ap = data.join('')
return ap unless ap
# Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "",
@@ -958,10 +980,11 @@
end
private :cc_alternative_products_variants
def cc_biophysiochemical_properties(data)
+ return nil unless data
data = data[0]
hash = {'Absorption' => {},
'Kinetic parameters' => {},
'pH dependence' => "",
@@ -993,19 +1016,21 @@
end
private :cc_biophysiochemical_properties
def cc_caution(data)
+ return nil unless data
data.join('')
end
private :cc_caution
# returns conteins in a line of the CC INTERACTION section.
#
# CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
def cc_interaction(data)
+ return nil unless data
str = data.join('')
it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
it.map {|ent|
ent.map! {|x| x.strip }
if ent[0] =~ /^(.+):(.+)/
@@ -1057,18 +1082,20 @@
end
private :cc_mass_spectrometry
def cc_pathway(data)
+ return nil unless data
data.map {|x| x.sub(/\.$/, '') }.map {|x|
x.split(/; | and |: /)
}[0]
end
private :cc_pathway
def cc_rna_editing(data)
+ return nil unless data
data = data.join('')
entry = {'Modified_positions' => [], 'Note' => ""}
if data =~ /Modified_positions=(.+?)(\.|;)/
entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
else
@@ -1081,10 +1108,11 @@
end
private :cc_rna_editing
def cc_subcellular_location(data)
+ return nil unless data
data.map {|x|
x.split('. ').map {|y|
y.split('; ').map {|z|
z.sub(/\.$/, '')
}
@@ -1099,10 +1127,11 @@
# CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format:
# CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
#++
def cc_web_resource(data)
+ return nil unless data
data.map {|x|
entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
x.split(';').each do |y|
case y
when /(Name|Note)\=(.+)/
@@ -1233,13 +1262,17 @@
cur_ft = nil
cont = false
begin
ftlines.each do |line|
if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ line
- cur_ft = [$1.to_s, # Feature Name
- "#{$2}#{$4}", # From
- $5.to_s, # To
- [] # Qualifiers
+ f_name = $1.to_s
+ f_from = "#{$2}#{$4}"
+ f_to = $5.to_s
+ f_to = f_from if f_to.empty?
+ cur_ft = [f_name, # Feature Name
+ f_from, # From
+ f_to, # To
+ [] # Qualifiers
]
table.push cur_ft
cont = false
elsif cont && /^FT {19}/ =~ line
str = $'