lib/relaton_cie/scrapper.rb in relaton-cie-1.14.0 vs lib/relaton_cie/scrapper.rb in relaton-cie-1.14.1
- old
+ new
@@ -1,14 +1,18 @@
module RelatonCie
module Scrapper
- ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-cie/master/data/".freeze
+ ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-cie/master/".freeze
+ INDEX_FILE = "index-v1.yaml".freeze
class << self
# @param code [String]
# @return [RelatonCie::BibliographicItem]
def scrape_page(code)
- url = "#{ENDPOINT}#{code.gsub(/[\/\s\-:.]/, '_').upcase}.yaml"
- parse_page url
+ index = Relaton::Index.find_or_create :cie, url: "#{ENDPOINT}index-v1.zip", file: INDEX_FILE
+ row = index.search(code).min_by { |r| r[:id] }
+ return unless row
+
+ parse_page "#{ENDPOINT}#{row[:file]}"
rescue OpenURI::HTTPError => e
return if e.io.status.first == "404"
raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
end