lib/service_adaptors/sfx.rb in umlaut-3.0.2 vs lib/service_adaptors/sfx.rb in umlaut-3.0.3
- old
+ new
@@ -321,12 +321,15 @@
response_data[:coverage] = coverage if coverage
# Sfx metadata we want
response_data[:sfx_base_url] = @base_url
response_data[:sfx_obj_index] = sfx_obj_index + 1 # sfx is 1 indexed
- response_data[:sfx_target_index] = target_index + 1
- response_data[:sfx_request_id] = (perl_data/"//hash/item[@key='sfx.request_id']").first.inner_text
+ response_data[:sfx_target_index] = target_index + 1
+ # sometimes the sfx.request_id is missing, go figure.
+ if request_id = (perl_data/"//hash/item[@key='sfx.request_id']").first
+ response_data[:sfx_request_id] = request_id.inner_text
+ end
response_data[:sfx_target_service_id] = target_service_id
response_data[:sfx_target_name] = sfx_target_name
# At url-generation time, the request isn't available to us anymore,
# so we better store this citation info here now, since we need it
# for sfx click passthrough
@@ -451,10 +454,16 @@
doc.search('perldata/hash/item').each do |item|
key = item['key'].to_s
value = item.inner_text
+
+ # SFX sometimes returns invalid UTF8 (is it really ISO 8859? Is it
+ # predictable? Who knows. If it's not valid, it'll cause all
+ # sorts of problems later. So if it's not valid, we're just
+ # going to ignore it, sorry.
+ next unless value.valid_encoding?
# Some normalization. SFX uses rft.year, which is not actually
# legal. Stick it in rft.date instead.
key = "rft.date" if key == "rft.year"
@@ -480,10 +489,10 @@
end
# But this still has HTML entities in it sometimes. Now we've
# got to decode THAT.
# TODO: Are we sure we need to do this? We need an example
- # from SFX result to test, it's potentially expensive.
+ # from SFX result to test, it's potentially expensive.
value = html_ent_coder.decode(value)
# object_type? Fix that to be the right way.
if (prefix=='rft') && (key=='object_type')
co.referent.set_format( value.downcase )