app/controllers/search_methods/sfx4.rb in umlaut-3.0.5 vs app/controllers/search_methods/sfx4.rb in umlaut-3.1.0.pre1

- old
+ new

@@ -1,83 +1,49 @@ require 'nokogiri' - module SearchMethods module Sfx4 include MetadataHelper # for normalize_lccn - + protected - - # used by umlaut:load_sfx_urls task. Kind of hacky way of trying to extract - # target URLs from SFX4. - def self.fetch_sfx_urls(sfx_global_db = "sfxglb41") - connection = SfxDb::Object.connection + # Class method for the module that gets called by the umlaut:load_sfx_urls task. + # Determines whether we should attempt to fetch SFX urls. + # Will probably be deprecated in the near future. + def self.fetch_urls? + sfx4_base.connection_configured? + end - # Crazy crazy URLs to try to find PARSE_PARAMS in Sfx4 db that have a period in - # them, so they look like they might be URLs. Parse params could be at target service - # level, or at portfolio level; and could be in local overrides or in global kb. - # This is crazy crazy SQL to get this, sorry. Talking directly to SFX db isn't - # a great idea, but best way we've found to get this for now. Might make more - # sense to try to use the (very very slow) SFX export in the future instead. - sql = %{ - SELECT - COALESCE(LCL_SERVICE_LINKING_INFO.PARSE_PARAM,KB_TARGET_SERVICES.PARSE_PARAM) PARSE_PARAM - FROM - LCL_TARGET_INVENTORY - JOIN #{sfx_global_db}.KB_TARGET_SERVICES - ON KB_TARGET_SERVICES.TARGET_ID = LCL_TARGET_INVENTORY.TARGET_ID - JOIN LCL_SERVICE_INVENTORY - ON LCL_TARGET_INVENTORY.TARGET_ID = LCL_SERVICE_INVENTORY.TARGET_ID - LEFT OUTER JOIN LCL_SERVICE_LINKING_INFO - ON LCL_SERVICE_INVENTORY.TARGET_SERVICE_ID = LCL_SERVICE_LINKING_INFO.TARGET_SERVICE_ID - WHERE - ( LCL_SERVICE_LINKING_INFO.PARSE_PARAM like '%.%' OR - KB_TARGET_SERVICES.PARSE_PARAM like '%.%' ) - AND - LCL_SERVICE_INVENTORY.ACTIVATION_STATUS='ACTIVE' - AND - LCL_TARGET_INVENTORY.ACTIVATION_STATUS = 'ACTIVE' - - UNION - -- object portfolio parse param version - SELECT - COALESCE(LCL_OBJECT_PORTFOLIO_LINKING_INFO.PARSE_PARAM, KB_OBJECT_PORTFOLIOS.PARSE_PARAM) PARSE_PARAM - FROM - #{sfx_global_db}.KB_OBJECT_PORTFOLIOS - JOIN LCL_SERVICE_INVENTORY - ON KB_OBJECT_PORTFOLIOS.TARGET_SERVICE_ID = LCL_SERVICE_INVENTORY.TARGET_SERVICE_ID - JOIN LCL_OBJECT_PORTFOLIO_INVENTORY - ON KB_OBJECT_PORTFOLIOS.OP_ID = LCL_OBJECT_PORTFOLIO_INVENTORY.OP_ID - left outer join LCL_OBJECT_PORTFOLIO_LINKING_INFO - ON KB_OBJECT_PORTFOLIOS.OP_ID = LCL_OBJECT_PORTFOLIO_LINKING_INFO.OP_ID - WHERE - ( KB_OBJECT_PORTFOLIOS.PARSE_PARAM like '%.%' OR - LCL_OBJECT_PORTFOLIO_LINKING_INFO.PARSE_PARAM like '%.%' ) - AND LCL_OBJECT_PORTFOLIO_INVENTORY.ACTIVATION_STATUS = 'ACTIVE' - AND LCL_SERVICE_INVENTORY.ACTIVATION_STATUS='ACTIVE' - } - - results = connection.select_all(sql) - - urls = [] - results.each do |line| - param_string = line["PARSE_PARAM"] - - # Try to get things that look sort of like URLs out. Brutal force, - # sorry. - url_re = Regexp.new('(https?://\S+\.\S+)(\s|$)') - urls.concat( param_string.scan( url_re ).collect {|matches| matches[0]} ) - end - urls.uniq! - return urls + # Class method for the module that gets called by the umlaut:load_sfx_urls task. + # Kind of hacky way of trying to extract target URLs from SFX4. + # Will probably be deprecated in the near future. + def self.fetch_urls + sfx4_base.fetch_urls end - + # Class method for the module. + # Returns the SFX4 base class in order to establish a connection. + def self.sfx4_base + # Need to do this convoluted Module.const_get so that we find the + # correct class. Otherwise the module looks locally and can't find it. + Module.const_get(:Sfx4).const_get(:Local).const_get(:Base) + end + + # Instance method that returns the SFX4 AzTitle class for this search method. + # Can be overridden by search methods that want to include this one. + def az_title_klass + # Need to do this convoluted Module.const_get so that we find the + # correct class. Otherwise the module looks locally and can't find it. + Module.const_get(:Sfx4).const_get(:Local).const_get(:AzTitle) + end + + # Instance method that returns the SFX4 DB connection for this search method. + def sfx4_db_connection + az_title_klass.connection + end + # Needs to return ContextObjects def find_by_title connection = sfx4_db_connection - - query_match_clause = case search_type_param when "contains" terms = title_query_param.split(" ") #SFX4 seems to ignore 'the' or 'a' on the front, so we will too. if (["the", "a"].include? terms[0]) @@ -86,11 +52,10 @@ # Then make each term required, but stemmed. Seems to match SFX4, # and more importantly give us decent results. query = terms.collect do |term| "+" + connection.quote_string(term) + "*" end.join(" ") - "MATCH (TS.TITLE_SEARCH) AGAINST ('#{query}' IN BOOLEAN MODE)" when "begins" # For 'begins', searching against TITLE itself rather than TITLE_SEARCH gives us # results more like SFX4 native, without so many 'also known as' titles confusing # things. @@ -100,38 +65,32 @@ "( TS.TITLE_SEARCH = '#{connection.quote_string(title_query_param)}' OR T.TITLE_DISPLAY = '#{connection.quote_string(title_query_param)}' OR T.TITLE_SORT = '#{connection.quote_string(title_query_param)}' )" end.upcase - from_where_clause = %{ FROM AZ_TITLE T, AZ_TITLE_SEARCH TS WHERE TS.AZ_TITLE_ID = T.AZ_TITLE_ID AND #{query_match_clause} AND T.AZ_PROFILE = '#{connection.quote_string(sfx_az_profile)}' } - statement = %{ SELECT DISTINCT T.OBJECT_ID #{from_where_clause} ORDER BY T.SCRIPT DESC, T.TITLE_SORT LIMIT #{batch_size.to_i} OFFSET #{(batch_size * (page - 1)).to_i} - } - + } # do the count - total_hits = SfxDb::Object.count_by_sql( - "SELECT COUNT(DISTINCT(T.OBJECT_ID)) #{from_where_clause}" - ) - + total_hits = az_title_klass.count_by_sql( + "SELECT COUNT(DISTINCT(T.OBJECT_ID)) #{from_where_clause}") object_ids = connection.select_all(statement).collect {|i| i.values.first} - - sql = SfxDb::Object.send(:sanitize_sql_array, + sql = az_title_klass.send(:sanitize_sql_array, [%{ SELECT EI.OBJECT_ID, T.TITLE_DISPLAY, EI.EXTRA_INFO_XML FROM AZ_TITLE T @@ -141,45 +100,36 @@ T.AZ_PROFILE=? AND EI.OBJECT_ID IN (?) ORDER BY T.SCRIPT DESC, T.TITLE_SORT }, - sfx_az_profile, - object_ids]) - + sfx_az_profile, object_ids]) title_objects = connection.select_all(sql) - # Make em into context objects context_objects = title_objects.collect do |sfx_obj| ctx = OpenURL::ContextObject.new # Start out wtih everything in search, to preserve date/vol/etc ctx.import_context_object( context_object_from_params ) - extra_info_xml = Nokogiri::XML( sfx_obj["EXTRA_INFO_XML"] ) - # Put SFX object id in rft.object_id, that's what SFX does. ctx.referent.set_metadata('object_id', sfx_obj["OBJECT_ID"].to_s ) ctx.referent.set_metadata("jtitle", sfx_obj["TITLE_DISPLAY"] || "Unknown Title") - issn = extra_info_xml.search("item[key=issn]").text isbn = extra_info_xml.search("item[key=isbn]").text - # LCCN is stored corrupted in xml in SFX db, without prefix like "sn" that # is a significant part of lccn. Our reverse engineering of SFX failed, # apparently there's a workaround in SFX app code. Forget it, bail # don't try to use lccn. #lccn = extra_info_xml.search("item[key=lccn]").text - ctx.referent.set_metadata("issn", issn ) unless issn.blank? ctx.referent.set_metadata("isbn", isbn) unless isbn.blank? #ctx.referent.add_identifier("info:lccn/#{normalize_lccn(lccn)}") unless lccn.blank? - ctx end return [context_objects, total_hits] end - + # Used for clicks on A, B, C, 0-9, etc. def find_by_group connection = sfx4_db_connection from_where_clause = %{ FROM @@ -192,71 +142,52 @@ T.AZ_PROFILE= '#{connection.quote_string(sfx_az_profile)}' AND #{sfx4_quoted_letter_group_condition} } count_sql = %{ SELECT count(*) - #{from_where_clause} } - fetch_sql = %{ SELECT EI.OBJECT_ID, T.TITLE_DISPLAY, EI.EXTRA_INFO_XML - #{from_where_clause} - ORDER BY T.SCRIPT DESC, T.TITLE_SORT LIMIT #{batch_size.to_i} OFFSET #{(batch_size * (page - 1)).to_i} } - - total_count = SfxDb::Object.count_by_sql( count_sql ) + total_count = az_title_klass.count_by_sql( count_sql ) context_objects = sfx4_db_to_ctxobj( connection.select_all(fetch_sql) ) - return [context_objects, total_count] end - - def sfx4_db_connection - SfxDb::Object.connection - end - + def sfx4_quoted_letter_group_condition - " AZ_LETTER_GROUP.AZ_LETTER_GROUP_NAME " + - case params[:id] - when "0-9" - " IN ('0','1','2','3','4','5','6','7','8','9')" - when /^Other/i - "= 'Others'" - else - "= '#{sfx4_db_connection.quote_string(params[:id].upcase)}'" - end + " AZ_LETTER_GROUP.AZ_LETTER_GROUP_NAME " + case params[:id] + when "0-9" + " IN ('0','1','2','3','4','5','6','7','8','9')" + when /^Other/i + "= 'Others'" + else + "= '#{sfx4_db_connection.quote_string(params[:id].upcase)}'" + end end - + def sfx4_db_to_ctxobj(title_rows) title_rows.collect do |sfx_obj| ctx = OpenURL::ContextObject.new # Start out wtih everything in search, to preserve date/vol/etc ctx.import_context_object( context_object_from_params ) - extra_info_xml = Nokogiri::XML( sfx_obj["EXTRA_INFO_XML"] ) - # Put SFX object id in rft.object_id, that's what SFX does. ctx.referent.set_metadata('object_id', sfx_obj["OBJECT_ID"]) ctx.referent.set_metadata("jtitle", sfx_obj["TITLE_DISPLAY"] || "Unknown Title") - issn = extra_info_xml.search("item[key=issn]").text isbn = extra_info_xml.search("item[key=isbn]").text lccn = extra_info_xml.search("item[key=lccn]").text - ctx.referent.set_metadata("issn", issn ) unless issn.blank? ctx.referent.set_metadata("isbn", isbn) unless isbn.blank? ctx.referent.add_identifier("info:lccn/#{normalize_lccn(lccn)}") unless lccn.blank? - ctx end - end - end end -