lib/relaton_calconnect/scrapper.rb in relaton-calconnect-1.13.0 vs lib/relaton_calconnect/scrapper.rb in relaton-calconnect-1.13.1
- old
+ new
@@ -3,32 +3,43 @@
DOMAIN = "https://standards.calconnect.org/".freeze
SCHEME, HOST = DOMAIN.split(%r{:?/?/})
# DOMAIN = "http://127.0.0.1:4000/".freeze
class << self
- # papam hit [Hash]
- # @return [RelatonOgc::OrcBibliographicItem]
+ #
+ # Parse document page
+ #
+ # @papam hit [Hash] document hash
+ #
+ # @return [RelatonCalconnect::CcBibliographicItem] bibliographic item
+ #
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
links = array(hit["link"])
link = links.detect { |l| l["type"] == "rxl" }
if link
bib = fetch_bib_xml link["content"]
update_links bib, links
# XMLParser.from_xml bib_xml
else
+ hit.delete "fetched"
bib = RelatonCalconnect::CcBibliographicItem.from_hash doc_to_hash hit
end
bib.link.each do |l|
l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host
end
bib
end
private
- # @param url [String]
- # @return [String] XML
+ #
+ # Fetch bibliographic item from XML source
+ #
+ # @param url [String] URL to fetch
+ #
+ # @return [RelatonCalconnect::CcBibliographicItem] bibliographic item
+ #
def fetch_bib_xml(url) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
rxl = get_rxl url
uri_rxl = rxl.at("uri[@type='rxl']")
if uri_rxl
uri_xml = rxl.xpath("//uri").to_xml
@@ -59,10 +70,11 @@
array(doc["editorialgroup"]).each do |eg|
tc = eg.delete("technical_committee")
eg.merge!(tc) if tc
end
dtps = %w[CC CSD]
- array(doc["docid"]).detect { |id| dtps.include? id["type"].upcase }["primary"] = true
+ did = array(doc["docid"]).detect { |id| dtps.include? id["type"].upcase }
+ did["primary"] = true if did
doc
end
def update_links(bib, links)
links.each do |l|