lib/service_adaptors/sfx.rb in umlaut-3.0.2 vs lib/service_adaptors/sfx.rb in umlaut-3.0.3

- old
+ new

@@ -321,12 +321,15 @@ response_data[:coverage] = coverage if coverage # Sfx metadata we want response_data[:sfx_base_url] = @base_url response_data[:sfx_obj_index] = sfx_obj_index + 1 # sfx is 1 indexed - response_data[:sfx_target_index] = target_index + 1 - response_data[:sfx_request_id] = (perl_data/"//hash/item[@key='sfx.request_id']").first.inner_text + response_data[:sfx_target_index] = target_index + 1 + # sometimes the sfx.request_id is missing, go figure. + if request_id = (perl_data/"//hash/item[@key='sfx.request_id']").first + response_data[:sfx_request_id] = request_id.inner_text + end response_data[:sfx_target_service_id] = target_service_id response_data[:sfx_target_name] = sfx_target_name # At url-generation time, the request isn't available to us anymore, # so we better store this citation info here now, since we need it # for sfx click passthrough @@ -451,10 +454,16 @@ doc.search('perldata/hash/item').each do |item| key = item['key'].to_s value = item.inner_text + + # SFX sometimes returns invalid UTF8 (is it really ISO 8859? Is it + # predictable? Who knows. If it's not valid, it'll cause all + # sorts of problems later. So if it's not valid, we're just + # going to ignore it, sorry. + next unless value.valid_encoding? # Some normalization. SFX uses rft.year, which is not actually # legal. Stick it in rft.date instead. key = "rft.date" if key == "rft.year" @@ -480,10 +489,10 @@ end # But this still has HTML entities in it sometimes. Now we've # got to decode THAT. # TODO: Are we sure we need to do this? We need an example - # from SFX result to test, it's potentially expensive. + # from SFX result to test, it's potentially expensive. value = html_ent_coder.decode(value) # object_type? Fix that to be the right way. if (prefix=='rft') && (key=='object_type') co.referent.set_format( value.downcase )