Sha256: 07bc8982097e95eaf714ead8fa10bf6efbd54015729377f7c1fde7fdc6a9e692

Contents?: true

Size: 1.03 KB

Versions: 3

Compression:

Stored size: 1.03 KB

Contents

module RelatonCalconnect
  module Scrapper
    # DOMAIN = "https://standards.calconnect.org/".freeze
    DOMAIN = "http://127.0.0.1:4000/".freeze

    class << self
      # papam hit [Hash]
      # @return [RelatonOgc::OrcBibliographicItem]
      def parse_page(hit)
        link = hit["link"].detect { |l| l["type"] == "rxl" }
        if link
          bib_xml = fetch_bib_xml link["content"]
          XMLParser.from_xml bib_xml
        end
      end

      private

      # @param url [String]
      # @return [String] XML
      def fetch_bib_xml(url)
        rxl = get_rxl url
        uri_rxl = rxl.at("uri[@type='rxl']")
        return rxl.to_xml unless uri_rxl

        uri_xml = rxl.xpath("//uri").to_xml
        rxl = get_rxl uri_rxl.text
        docid = rxl.at "//docidentifier"
        docid.add_previous_sibling uri_xml
        rxl.to_xml
      end

      # @param path [String]
      # @return [Nokogiri::XML::Document]
      def get_rxl(path)
        resp = Faraday.get DOMAIN + path
        Nokogiri::XML resp.body
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
relaton-calconnect-0.1.2 lib/relaton_calconnect/scrapper.rb
relaton-calconnect-0.1.1 lib/relaton_calconnect/scrapper.rb
relaton-calconnect-0.1.0 lib/relaton_calconnect/scrapper.rb