require "mechanize" module RelatonBipm class BipmBibliography GH_ENDPOINT = "".freeze IOP_DOMAIN = "".freeze TRANSLATIONS = { "Déclaration" => "Declaration", "Réunion" => "Meeting", "Recommandation" => "Recommendation", "Résolution" => "Resolution", "Décision" => "Decision", }.freeze class << self # @param text [String] # @return [RelatonBipm::BipmBibliographicItem] def search(text, _year = nil, _opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength warn "[relaton-bipm] (\"#{text}\") fetching..." ref = text.sub(/^BIPM\s/, "") item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent) unless item warn "[relaton-bipm] (\"#{text}\") not found." return end warn("[relaton-bipm] (\"#{text}\") found #{item.docidentifier[0].id}") item.fetched = item rescue Mechanize::ResponseCodeError => e raise RelatonBib::RequestError, e.message unless e.response_code == "404" end # @return [Mechanize] def magent # rubocop:disable Metrics/MethodLength a = a.request_headers = { "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,"\ "image/avif,image/webp,image/apng,"\ "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding" => "gzip, deflate, br", "Accept-Language" => "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7", "Cache-Control" => "max-age=0", "Upgrade-Insecure-Requests" => "1", } a.user_agent_alias = a end # @param ref [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_bipm(ref, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength rf = ref.sub(/(?:(\d{1,2})\s)?\(?(\d{4})(?!-)\)?/) do "#{$2}-#{$1.to_s.rjust(2, '0')}" end rf.sub!("CCDS", "CCTF") TRANSLATIONS.each { |fr, en| rf.sub! fr, en } path = rf return unless path url = "#{GH_ENDPOINT}#{path}" resp = agent.get url check_response resp return unless resp.code == "200" yaml = RelatonBib.parse_yaml resp.body, [Date] bib_hash = HashConverter.hash_to_bib yaml**bib_hash) end # @param ref [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_metrologia(ref, agent) agent.redirect_ok = false ref_arr = ref.split case ref_arr.size when 1 then get_journal agent when 2 then get_volume ref_arr[1], agent when 3 then get_issue(*ref_arr[1..2], agent) when 4 then get_article_from_issue(*ref_arr[1..3], agent) end end # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_journal(agent) url = "#{IOP_DOMAIN}/journal/0026-1394" rsp = agent.get url check_response rsp rel = rsp.xpath('//select[@id="allVolumesSelector"]/option').map do |v| { type: "partOf", bibitem: journal_rel(v) } end did = doc_id [] bibitem(formattedref: fref(, docid: [did], link: blink(url), relation: rel) end # @param elm [Nokogiri::XML::Element] def journal_rel(elm) vol = elm[:value].split("/").last did = doc_id [vol] url = IOP_DOMAIN + elm[:value] fref(, docid: [did], link: blink(url)) end # @param vol [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_volume(vol, agent) url = "#{IOP_DOMAIN}/volume/0026-1394/#{vol}" rsp = agent.get url check_response rsp rel = rsp.xpath('//li[@itemprop="hasPart"]').map do |i| { type: "partOf", bibitem: volume_rel(i, vol) } end did = doc_id [vol] bibitem(formattedref: fref(, docid: [did], link: blink(url), date: bdate(rsp), relation: rel, extent: btextent(vol), series: series) end def volume_rel(elm, vol) # rubocop:disable Metrics/AbcSize a = 'a[@itemprop="issueNumber"]' ish = a[:href].split("/").last url = IOP_DOMAIN + a[:href] docid = doc_id [vol, ish] t = "p" title_fref = t ? { title: titles(t.text) } : { formattedref: fref( }**title_fref, docid: [docid], link: blink(url)) end # @param title [String] # @return [RelatonBib::TypedTitleStringCollection] def titles(title) RelatonBib::TypedTitleString.from_string title, "en", "Latn" end # @param vol [String] # @param ish [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_issue(vol, ish, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength url = issue_url vol, ish rsp = agent.get url check_response rsp rel = rsp.xpath('//div[@class="art-list-item-body"]').map do |a| { type: "partOf", bibitem: issue_rel(a, vol, ish) } end did = doc_id [vol, ish] title_fref = { title: issue_title(rsp) } title_fref[:formattedref] = fref unless title_fref[:title].any? bibitem(**title_fref, link: blink(url), relation: rel, docid: [did], date: bdate(rsp), extent: btextent(vol, ish), series: series) end # @param ref [String] # @return [RelatonBib::FormattedRef] def fref(ref) content: ref, language: "en", script: "Latn" end # @param rsp [Mechanize::Page] # @return [RelatonBib::TypedTitleStringCollection] def issue_title(rsp) t ='//div[@id="wd-jnl-issue-title"]/h4') return [] unless t titles(t.text) end # @oaran vol [String] # @param ish [String] # @return [String] def issue_url(vol, ish) "#{IOP_DOMAIN}/issue/0026-1394/#{vol}/#{ish}" end # @param elm [Nokogiri::XML::Element] # @param vol [String] # @param ish [String] # @return [RelatonBipm::BipmBibliographicItem] def issue_rel(elm, vol, ish) art ='div[@class="indexer"]').text ref ='div/a[@class="art-list-item-title"]') title = titles ref.text.strip docid = doc_id [vol, ish, art] link = blink IOP_DOMAIN + ref[:href] title, docid: [docid], link: link) end # @param content [RelatonBib::TypedTitleString] # @return [RelatonBib::TypedTitleString] def btitle(content) type: "main", content: content, language: "en", script: "Latn" end # @param url [String] # @return [String] def blink(url) [ "src", content: url)] end # @param rsp [Mechanize::Page] # @return [Array] def bdate(rsp) date ='//p[@itemprop="issueNumber"]|//h2[@itemprop="volumeNumber"]').text.split(", ").last on = date.match?(/^\d{4}$/) ? date : Date.parse(date).strftime("%Y-%m") [ "published", on: on)] end # @param args [Array] # @return [RelatonBib::DocumentIdentifier] def doc_id(args) id = args.clone.unshift "Metrologia" "BIPM", id: id.join(" "), primary: true) end # @param vol [String] # @param ish [String] # @param art [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_article_from_issue(vol, ish, art, agent) # rubocop:disable Metrics/MethodLength url = issue_url vol, ish rsp = agent.get url check_response rsp link ="//div[@class='indexer'][.='#{art}']/../div/a") unless link arts = rsp.xpath("//div[@class='indexer']").map(&:text) warn "[relaton-bipm] No article is available at the specified start page \"#{art}\" in issue \"BIPM Metrologia #{vol} #{ish}\"." warn "[relaton-bipm] Available articles in the issue start at the following pages: (#{arts.join(', ')})" return end get_article link[:href], vol, ish, agent end # @param path [String] # @param vol [String] # @param ish [String] # @param agent [Mechanize] # @return [RelatonBipm::BipmBibliographicItem] def get_article(path, vol, ish, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength rsp = agent.get path check_response rsp url = rsp.uri bib = rsp.link_with(text: "BibTeX").href rsp = agent.get bib check_response rsp bt = BibTeX.parse(rsp.body).first bibitem( docid: btdocid(bt), title: titles(bt.title.to_s), date: btdate(bt), abstract: btabstract(bt), doctype: bt.type.to_s, series: series, link: btlink(bt, url), contributor: btcontrib(bt), extent: btextent(vol, ish, bt.pages.to_s) ) end # @param args [Hash] # @return [RelatonBipm::BipmBibliographicItem] def bibitem(**args) fetched:, type: "article", language: ["en"], script: ["Latn"], **args, ) end # @return [Array] def series [ btitle("Metrologia"))] end # @param bibtex [BibTeX::Entry] # @return [Array] def btdocid(bibtex) id = "#{bibtex.journal} #{bibtex.volume} #{bibtex.number} #{bibtex.pages.match(/^\d+/)}" [ "BIPM", id: id, primary: true), "DOI", id: bibtex.doi), ] end # @param bibtex [BibTeX::Entry] # @return [Array] def btabstract(bibtex) [ bibtex.abstract.to_s, language: "en", script: "Latn")] end # @param bibtex [BibTeX::Entry] # @param ref [URI] # @return [Array] def btlink(bibtex, ref) [ "src", content: ref.to_s), "doi", content: bibtex.url.to_s), ] end # @param bibtex [BibTeX::Entry] # @return [Array] def btdate(bibtex) on =, bibtex.month_numeric) [ "published", on: on)] end # @param bibtex [BibTeX::Entry] # @return [Array] def btcontrib(bibtex) surname, initial = ", " initial = { |i| i, "en", "Latn" } surname = surname, "en", "Latn" name = surname: surname, initial: initial author = name: name [ { entity: { name: bibtex.publisher.to_s }, role: [{ type: "publisher" }] }, { entity: author, role: [{ type: "author" }] }, ] end # # @param vol [String] volume # @param ish [String] issue # @param pgs [String] pages # # @return [Array] # def btextent(vol, ish = nil, pgs = nil) ext = ["volume", vol)] ext <<"issue", ish) if ish ext <<"page", *pgs.split("--")) if pgs ext end # @param ref [String] the BIPM standard Code to look up (e..g "BIPM B-11") # @param year [String] not used # @param opts [Hash] not used # @return [RelatonBipm::BipmBibliographicItem] def get(ref, year = nil, opts = {}) search(ref, year, opts) end private # # Check HTTP response. Warn and rise error if response is not 200 # or redirect to CAPTCHA. # # @param [Mechanize] rsp response # # @raise [RelatonBib::RequestError] if response is not 200 # def check_response(rsp) # rubocop:disable Metrics/AbcSize if rsp.code == "302" warn "[relaton-bipm] This source employs anti-DDoS measures that unfortunately affects automated requests." warn "[relaton-bipm] Please visit this link in your browser to resolve the CAPTCHA, then retry: #{rsp.uri}" # warn "[relaton-bipm] #{rsp.uri} is redirected to #{rsp.header['location']}" raise RelatonBib::RequestError, "cannot access #{rsp.uri}" elsif rsp.code != "200" warn "[read_bipm] can't acces #{rsp.uri} #{rsp.code}" raise RelatonBib::RequestError, "cannot acces #{rsp.uri} #{rsp.code}" end end end end end