# Service adapter plug-in. # # # PURPOSE: Includes "cited by", "similar articles" and "more by these authors" # links from scopus. # # LIMTATIONS: You must be a Scopus customer for these links generated to work # for your users at all! Off-campus users should be probably going through ezproxy, see # the EZProxy plug-in. # Must find a match in scopus, naturally. "cited by" will only # be included if Scopus has non-0 "cited by" links. But there's no good way # to precheck similar/more-by for this, so they are provided blind and may # result in 0 hits. You can turn them off if you like, with @include_similar, # and @include_more_by_authors. # Abstracts are not used because it seems to violate Scopus terms of service # to use them. # # REGISTERING: Register for a Scopus API key at: # http://www.developers.elsevier.com/action/devprojects?pageOrigin=cmsPage&zone=topNavBar # Look for "Register a new site" button at the bottom right of the page. # # For the second Scopus API, you theoretically need a Scopus "PartnerID" and # corresponding "release number", in @partner_id and @scopus_release # There's no real easy way to get one. Scopus says: # "To obtain a partner ID or release number, contact your nearest regional # Scopus office. A list of Scopus contacts is available at # http://www.info.scopus.com/contactus/index.shtml" # Bah! But fortunately, using the "partnerID" assigned to the Scopus Json # API, 65, _seems_ to work, and is coded here as the default. You could try # going with that. When you register a partnerID, you also get a 'salt key', # which is currently not used by this code, but @link_salt_key is reserved # for it in case added functionality does later. # # SCOPUS USEFUL URLS: # # api key register: http://www.developers.elsevier.com/action/devprojects?pageOrigin=cmsPage&zone=topNavBar # # 'content policies' terms of use: http://www.developers.elsevier.com/cms/content-apis # # API overview docs: http://www.developers.elsevier.com/cms/content-apis # # Various other api docs? Confused myself as to organization here. # # * http://www.developers.elsevier.com/devcms/content-api-search-request # * http://www.developers.elsevier.com/devcms/content/search-fields-overview # * http://api.elsevier.com/content/search/#d0n14606 # # Some API recommendations for federated search: http://www.developers.elsevier.com/cms/restful-api-federated-search # class Scopus2 < Service require 'umlaut_http' require 'nokogiri' include ActionView::Helpers::SanitizeHelper include MetadataHelper include UmlautHttp required_config_params :api_key attr_accessor :scopus_search_base def service_types_generated types = [] types.push( ServiceTypeValue[:cited_by] ) if @include_cited_by types.push( ServiceTypeValue[:abstract] ) if @include_abstract types.push( ServiceTypeValue[:similar] ) if @include_similar types.push( ServiceTypeValue[@more_by_authors_type] ) if @include_more_by_authors return types end def initialize(config) #defaults @display_name = "Scopus" @registered_referer @scopus_search_base = 'http://api.elsevier.com/content/search/index:SCOPUS' @include_cited_by = true @include_similar = true @include_more_by_authors = true @more_by_authors_type = "similar" @inward_cited_by_url = "http://www.scopus.com/scopus/inward/citedby.url" #@partner_id = "E5wmcMWC" @partner_id = 65 @link_salt_key = nil @scopus_release = "R6.0.0" # Scopus offers two algorithms for finding similar items. # This variable can be: # "key" => keyword based similarity # "ref" => reference based similiarity (cites similar refs?) Seems to offer 0 hits quite often, so we use keyword instead. # "aut" => author. More docs by same authors. Incorporated as seperate link usually. @more_like_this_type = "key" @inward_more_like_url = "http://www.scopus.com/scopus/inward/mlt.url" @credits = { @display_name => "http://www.scopus.com/home.url" } super(config) end def xml_namespaces @xml_namespaces ||= { "atom" => "http://www.w3.org/2005/Atom", "dc" => "http://purl.org/dc/elements/1.1/", "opensearch" => "http://a9.com/-/spec/opensearch/1.1/", "prism" => "http://prismstandard.org/namespaces/basic/2.0/" } end def handle(request) scopus_query = scopus_query(request) # we can't make a good query, nevermind. return request.dispatched(self, true) if scopus_query.blank? url = scopus_url(scopus_query) # Make the call. headers = {"Accept" => "application/xml"} headers["Referer"] = @registered_referer if @registered_referer response = http_fetch(url, :headers => headers, :raise_on_http_error_code => false) unless response.kind_of? Net::HTTPSuccess # error, sometimes we have info in XML xml = begin Nokogiri::XML(response.body) rescue Exception nil end code, message = nil, nil if xml && error = xml.at_xpath("./service-error") code = error.at_xpath("./status/statusCode") message = error.at_xpath("./status/statusText") end e = StandardError.new("Scopus returned error HTTP status #{response.code}: #{code}: #{message}: scopus query: #{url}") return request.dispatched(self, DispatchedService::FailedFatal, e) end xml = Nokogiri::XML(response.body) # Take the first hit from scopus's results, hope they relevancy ranked it # well. For DOI/pmid search, there should ordinarly be only one hit! first_hit = xml.at_xpath("//atom:entry[1]", xml_namespaces) # Weirdly, a zero-hit result has one containing an # (Sic). Could other kinds of errors be reported that # way too? Maybe. Better check just in case, ugh. if first_hit && (error = first_hit.at_xpath("./atom:error", xml_namespaces)) scopus_message = error.text if scopus_message == "Result set was empty" # Just zero hits, no big deal, but nothing to do. return request.dispatched(self, true) else # real error, log it. e = StandardError.new("Scopus returned error: #{error.text}: scopus query: #{url}") return request.dispatched(self, DispatchedService::FailedFatal, e) end end if first_hit if first_hit && (error = first_hit.at_xpath("./atom:error", xml_namespaces)) e = StandardError.new("Scopus returned error: #{error.text}") return request.dispatched(self, DispatchedService::FailedFatal, e) end if (@include_cited_by) try_add_cited_by_response(first_hit, request) end if (@include_similar) url = more_like_this_url(first_hit) # Pre-checking for actual hits not currently working, disabled. if (true || ( hits = check_for_hits(url) ) > 0 ) request.add_service_response( :service=>self, :display_text => "#{hits} #{ServiceTypeValue[:similar].display_name_pluralize.downcase.capitalize}", :url => url, :service_type_value => :similar) end end if ( @include_more_by_authors) url = more_like_this_url(first_hit, :type => "aut") # Pre-checking for actual hits not currently working, disabled. if (true || ( hits = check_for_hits(url) ) > 0 ) request.add_service_response( :service=>self, :display_text => "#{hits} More from these authors", :url => url, :service_type_value => :similar) end end end return request.dispatched(self, true) end # Returns a scopus advanced search query intended to find the exact # known item identified by this citation. # # NOT uri-escaped yet, make sure to uri-escape before putting it in a uri # param! # # Will try to use DOI or PMID if available. Otherwise # will use issn/year/vol/iss/start page if available. # In some cases may resort to author/title. def scopus_query(request) if (doi = get_doi(request.referent)) return "DOI(#{phrase(doi)})" elsif (pmid = get_pmid(request.referent)) return "PMID(#{phrase(pmid)})" elsif (isbn = get_isbn(request.referent)) # I don't think scopus has a lot of ISBN-holding citations, but # it allows search so we might as well try. return "ISBN(#{phrase(isbn)})" else # Okay, we're going to try to do it on issn/vol/issue/page. # If we don't have issn, we'll reluctantly use journal title # (damn you google scholar). metadata = request.referent.metadata issn = request.referent.issn if ( (issn || ! metadata['jtitle'].blank? ) && ! metadata['volume'].blank? && ! metadata['issue'].blank? && ! metadata['spage'].blank? ) query = "VOLUME(#{phrase(metadata['volume'])}) AND ISSUE(#{phrase(metadata['issue'])}) AND PAGEFIRST(#{phrase(metadata['spage'])}) " if ( issn ) query += " AND (ISSN(#{phrase(issn)}) OR EISSN(#{phrase(issn)}))" else query += " AND EXACTSRCTITLE(#{phrase(metadata['jtitle'])})" end return query end end return nil end def scopus_url(query) "#{@scopus_search_base}?apiKey=#{CGI.escape @api_key}&query=#{CGI.escape query}" end # backslash escapes any double quotes, and embeds string in scopus # phrase search double quotes. Does NOT uri-escape. def phrase(str) '"' + str.gsub('"', '\\"') + '"' end # Input is a ruby hash that came from the scopus JSON, representing # a single hit. We're going to add this as a result. def try_add_cited_by_response(result, request) # While scopus provides an "inwardurl" in the results, this just takes # us to the record detail page. We actually want to go RIGHT to the # list of cited-by items. So we create our own, based on Scopus's # reversed engineered predictable URLs. count_str = result.at_xpath("atom:citedby-count/text()", xml_namespaces).to_s count_i = count_str.to_i return if count_i < 1 label = ServiceTypeValue[:cited_by].display_name_pluralize.downcase.capitalize if count_i == 1 label = ServiceTypeValue[:cited_by].display_name.downcase.capitalize end cited_by_url = cited_by_url( result ) request.add_service_response(:service=>self, :display_text => "#{count_str} #{label}", :count=> count_i, :url => cited_by_url, :service_type_value => :cited_by) end def eid_from_hit(result) result.at_xpath("atom:eid/text()", xml_namespaces).to_s end def cited_by_url(result) eid = CGI.escape( eid_from_hit(result) ) #return "#{@scopus_cited_by_base}?eid=#{eid}&src=s&origin=recordpage" # Use the new scopus direct link format! return "#{@inward_cited_by_url}?partnerID=#{@partner_id}&rel=#{@scopus_release}&eid=#{eid}" return end def more_like_this_url(result, options = {}) options[:type] ||= @more_like_this_type eid = CGI.escape eid_from_hit(result) return "#{@inward_more_like_url}?partnerID=#{@partner_id}&rel=#{@scopus_release}&eid=#{eid}&mltType=#{options[:type]}" end # NOT currently working. Scopus doesn't make this easy. # Takes a scopus direct url for which we're not sure if there will be results # or not, and requests it and html screen-scrapes to get hit count. (We # can conveniently find this just in the html at least). # Works for cited_by and more_like_this searches at present. # May break if Scopus changes their html title! def check_for_hits(url) response = http_fetch(url).body response_html = Nokogiri::HTML(response) title = response_xml.at('title').inner_text # title is "X documents" (or 'Documents') if there are hits. # It's annoyingly "Search Error" if there are either 0 hits, or # if there was an actual error. So we can't easily log actual # errors, sorry. title.downcase =~ /^\s*(\d+)?\s+document/ if ( hits = $1) return hits.to_i else return 0 end end end