lib/relaton_calconnect/scrapper.rb in relaton-calconnect-1.13.0 vs lib/relaton_calconnect/scrapper.rb in relaton-calconnect-1.13.1

- old
+ new

@@ -3,32 +3,43 @@ DOMAIN = "https://standards.calconnect.org/".freeze SCHEME, HOST = DOMAIN.split(%r{:?/?/}) # DOMAIN = "http://127.0.0.1:4000/".freeze class << self - # papam hit [Hash] - # @return [RelatonOgc::OrcBibliographicItem] + # + # Parse document page + # + # @papam hit [Hash] document hash + # + # @return [RelatonCalconnect::CcBibliographicItem] bibliographic item + # def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength links = array(hit["link"]) link = links.detect { |l| l["type"] == "rxl" } if link bib = fetch_bib_xml link["content"] update_links bib, links # XMLParser.from_xml bib_xml else + hit.delete "fetched" bib = RelatonCalconnect::CcBibliographicItem.from_hash doc_to_hash hit end bib.link.each do |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host end bib end private - # @param url [String] - # @return [String] XML + # + # Fetch bibliographic item from XML source + # + # @param url [String] URL to fetch + # + # @return [RelatonCalconnect::CcBibliographicItem] bibliographic item + # def fetch_bib_xml(url) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength rxl = get_rxl url uri_rxl = rxl.at("uri[@type='rxl']") if uri_rxl uri_xml = rxl.xpath("//uri").to_xml @@ -59,10 +70,11 @@ array(doc["editorialgroup"]).each do |eg| tc = eg.delete("technical_committee") eg.merge!(tc) if tc end dtps = %w[CC CSD] - array(doc["docid"]).detect { |id| dtps.include? id["type"].upcase }["primary"] = true + did = array(doc["docid"]).detect { |id| dtps.include? id["type"].upcase } + did["primary"] = true if did doc end def update_links(bib, links) links.each do |l|