Sha256: 07bc8982097e95eaf714ead8fa10bf6efbd54015729377f7c1fde7fdc6a9e692
Contents?: true
Size: 1.03 KB
Versions: 3
Compression:
Stored size: 1.03 KB
Contents
module RelatonCalconnect module Scrapper # DOMAIN = "https://standards.calconnect.org/".freeze DOMAIN = "http://127.0.0.1:4000/".freeze class << self # papam hit [Hash] # @return [RelatonOgc::OrcBibliographicItem] def parse_page(hit) link = hit["link"].detect { |l| l["type"] == "rxl" } if link bib_xml = fetch_bib_xml link["content"] XMLParser.from_xml bib_xml end end private # @param url [String] # @return [String] XML def fetch_bib_xml(url) rxl = get_rxl url uri_rxl = rxl.at("uri[@type='rxl']") return rxl.to_xml unless uri_rxl uri_xml = rxl.xpath("//uri").to_xml rxl = get_rxl uri_rxl.text docid = rxl.at "//docidentifier" docid.add_previous_sibling uri_xml rxl.to_xml end # @param path [String] # @return [Nokogiri::XML::Document] def get_rxl(path) resp = Faraday.get DOMAIN + path Nokogiri::XML resp.body end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
relaton-calconnect-0.1.2 | lib/relaton_calconnect/scrapper.rb |
relaton-calconnect-0.1.1 | lib/relaton_calconnect/scrapper.rb |
relaton-calconnect-0.1.0 | lib/relaton_calconnect/scrapper.rb |