require "yaml" require_relative "registry" require_relative "db_cache" module Relaton class RelatonError < StandardError; end class Db # @param global_cache [String] directory of global DB # @param local_cache [String] directory of local DB def initialize(global_cache, local_cache) @registry = Relaton::Registry.instance gpath = global_cache && File.expand_path(global_cache) @db = open_cache_biblio(gpath, type: :global) lpath = local_cache && File.expand_path(local_cache) @local_db = open_cache_biblio(lpath, type: :local) @static_db = open_cache_biblio File.expand_path("../relaton/static_cache", __dir__) @queues = {} @semaphore = Mutex.new end # Move global or local caches to anothe dirs # @param new_dir [String, nil] # @param type: [Symbol] # @return [String, nil] def mv(new_dir, type: :global) case type when :global @db&.mv new_dir when :local @local_db&.mv new_dir end end # Clear global and local databases def clear @db&.clear @local_db&.clear end ## # The class of reference requested is determined by the prefix of the code: # GB Standard for gbbib, IETF for ietfbib, ISO for isobib, IEC or IEV for # iecbib, # # @param code [String] the ISO standard Code to look up (e.g. "ISO 9000") # @param year [String] the year the standard was published (optional) # # @param opts [Hash] options # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @return [nil, RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] ## def fetch(code, year = nil, opts = {}) stdclass = standard_class(code) || return processor = @registry.processors[stdclass] ref = if processor.respond_to?(:urn_to_code) processor.urn_to_code(code)&.first else code end ref ||= code result = combine_doc ref, year, opts, stdclass result ||= check_bibliocache(ref, year, opts, stdclass) result end # @see Relaton::Db#fetch def fetch_db(code, year = nil, opts = {}) opts[:fetch_db] = true fetch code, year, opts end # fetch all standards from DB # @param test [String, nil] # @param edition [String], nil # @param year [Integer, nil] # @return [Array] def fetch_all(text = nil, edition: nil, year: nil) result = @static_db.all do |file, yml| search_yml file, yml, text, edition, year end.compact db = @db || @local_db if db result += db.all do |file, xml| search_xml file, xml, text, edition, year end.compact end result end # Fetch asynchronously def fetch_async(code, year = nil, opts = {}, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength stdclass = standard_class code if stdclass unless @queues[stdclass] processor = @registry.processors[stdclass] threads = ENV["RELATON_FETCH_PARALLEL"]&.to_i || processor.threads wp = WorkersPool.new(threads) do |args| args[3].call fetch(*args[0..2]) rescue RelatonBib::RequestError => e args[3].call e end @queues[stdclass] = { queue: Queue.new, workers_pool: wp } Thread.new { process_queue @queues[stdclass] } end @queues[stdclass][:queue] << [code, year, opts, block] else yield nil end end # @param code [String] # @param year [String, NilClass] # @param stdclass [Symbol, NilClass] # # @param opts [Hash] # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @return [nil, RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] def fetch_std(code, year = nil, stdclass = nil, opts = {}) std = nil @registry.processors.each do |name, processor| std = name if processor.prefix == stdclass end std = standard_class(code) or return nil unless std check_bibliocache(code, year, opts, std) end # The document identifier class corresponding to the given code # @param code [String] # @return [Array] def docid_type(code) stdclass = standard_class(code) or return [nil, code] _prefix, code = strip_id_wrapper(code, stdclass) [@registry.processors[stdclass].idtype, code] end # @param key [String] # @return [Hash] def load_entry(key) unless @local_db.nil? entry = @local_db[key] return entry if entry end @db[key] end # @param key [String] # @param value [String] Bibitem xml serialisation. # @option value [String] Bibitem xml serialisation. def save_entry(key, value) @db.nil? || (@db[key] = value) @local_db.nil? || (@local_db[key] = value) end # list all entries as a serialization # @return [String] def to_xml db = @local_db || @db || return Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml| xml.documents do xml.parent.add_child db.all.join(" ") end end.to_xml end private # @param file [String] file path # @param yml [String] content in YAML format # @param text [String, nil] text to serach # @param edition [String, nil] edition to filter # @param year [Integer, nil] year to filter # @return [BibliographicItem, nil] def search_yml(file, yml, text, edition, year) item = search_edition_year(file, yml, edition, year) return unless item item if match_xml_text(item.to_xml(bibdata: true), text) end # @param file [String] file path # @param xml [String] content in XML format # @param text [String, nil] text to serach # @param edition [String, nil] edition to filter # @param year [Integer, nil] year to filter # @return [BibliographicItem, nil] def search_xml(file, xml, text, edition, year) return unless text.nil? || match_xml_text(xml, text) search_edition_year(file, xml, edition, year) end # @param file [String] file path # @param content [String] content in XML or YAmL format # @param edition [String, nil] edition to filter # @param year [Integer, nil] year to filter # @return [BibliographicItem, nil] def search_edition_year(file, content, edition, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity processor = @registry.processors[standard_class(file.split("/")[-2])] item = if file.match?(/xml$/) then processor.from_xml(content) else processor.hash_to_bib(YAML.safe_load(content)) end item if (edition.nil? || item.edition == edition) && (year.nil? || item.date.detect { |d| d.type == "published" && d.on(:year).to_s == year.to_s }) end # @param xml [String] content in XML format # @param text [String, nil] text to serach # @return [Boolean] def match_xml_text(xml, text) %r{((?=((?')|"))|>).*?#{text}.*?(?()(?()'|")|<)}mi.match?(xml) end # @param code [String] # @param year [String, nil] # @param stdslass [String] # # @param opts [Hash] options # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @return [nil, RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] def combine_doc(code, year, opts, stdclass) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity if (refs = code.split " + ").size > 1 reltype = "derivedFrom" reldesc = nil elsif (refs = code.split ", ").size > 1 reltype = "complements" reldesc = RelatonBib::FormattedString.new content: "amendment" else return end doc = @registry.processors[stdclass].hash_to_bib docid: { id: code } ref = refs[0] updates = check_bibliocache(ref, year, opts, stdclass) if updates doc.relation << RelatonBib::DocumentRelation.new(bibitem: updates, type: "updates") end divider = stdclass == :relaton_itu ? " " : "/" refs[1..-1].each_with_object(doc) do |c, d| bib = check_bibliocache(ref + divider + c, year, opts, stdclass) if bib d.relation << RelatonBib::DocumentRelation.new( type: reltype, description: reldesc, bibitem: bib, ) end end end # @param code [String] code of standard # @return [Symbol] standard class name def standard_class(code) @registry.processors.each do |name, processor| return name if /^(urn:)?#{processor.prefix}/i.match?(code) || processor.defaultprefix.match(code) end Util.log <<~WARN, :info [relaton] #{code} does not have a recognised prefix WARN end # TODO: i18n # Fofmat ID # @param code [String] # @param year [String] # # @param opts [Hash] # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @param stdClass [Symbol] # @return [Array] docid and code def std_id(code, year, opts, stdclass) prefix, code = strip_id_wrapper(code, stdclass) ret = code ret += (stdclass == :relaton_gb ? "-" : ":") + year if year ret += " (all parts)" if opts[:all_parts] ["#{prefix}(#{ret.strip})", code] end # Find prefix and clean code # @param code [String] # @param stdClass [Symbol] # @return [Array] def strip_id_wrapper(code, stdclass) prefix = @registry.processors[stdclass].prefix code = code.sub(/\u2013/, "-").sub(/^#{prefix}\((.+)\)$/, "\\1") [prefix, code] end # @param entry [String] XML string # @param stdclass [Symbol] # @return [nil, RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] def bib_retval(entry, stdclass) if entry.nil? || entry.match?(/^not_found/) then nil else @registry.processors[stdclass].from_xml(entry) end end # @param code [String] # @param year [String] # # @param opts [Hash] # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @param stdclass [Symbol] # @return [nil, RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] def check_bibliocache(code, year, opts, stdclass) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity id, searchcode = std_id(code, year, opts, stdclass) yaml = @static_db[id] if yaml return @registry.processors[stdclass].hash_to_bib YAML.safe_load(yaml) end db = @local_db || @db altdb = @local_db && @db ? @db : nil if db.nil? return if opts[:fetch_db] bibentry = new_bib_entry(searchcode, year, opts, stdclass, db: db, id: id) return bib_retval(bibentry, stdclass) end @semaphore.synchronize do db.delete(id) unless db.valid_entry?(id, year) end if altdb return bib_retval(altdb[id], stdclass) if opts[:fetch_db] @semaphore.synchronize do db.clone_entry id, altdb if altdb.valid_entry? id, year end entry = new_bib_entry(searchcode, year, opts, stdclass, db: db, id: id) unless db[id] @semaphore.synchronize do db[id] ||= entry altdb.clone_entry(id, db) if !altdb.valid_entry?(id, year) end else return bib_retval(db[id], stdclass) if opts[:fetch_db] db[id] ||= new_bib_entry(searchcode, year, opts, stdclass, db: db, id: id) end bib_retval(db[id], stdclass) end # @param code [String] # @param year [String] # # @param opts [Hash] # @option opts [Boolean] :all_parts If all-parts reference is required # @option opts [Boolean] :keep_year If undated reference should return # actual reference with year # @option opts [Integer] :retries (1) Number of network retries # # @param stdclass [Symbol] # @param db [Relaton::DbCache,`NilClass] # @param id [String] docid # @return [String] def new_bib_entry(code, year, opts, stdclass, **args) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity bib = net_retry(code, year, opts, stdclass, opts.fetch(:retries, 1)) bib_id = bib&.docidentifier&.first&.id # when docid doesn't match bib's id then return a reference to bib's id if args[:db] && args[:id] && bib_id && args[:id] !~ %r{#{Regexp.quote("(#{bib_id})")}} bid = std_id(bib.docidentifier.first.id, nil, {}, stdclass).first @semaphore.synchronize { args[:db][bid] ||= bib_entry bib } "redirection #{bid}" else bib_entry bib end end # @raise [RelatonBib::RequestError] def net_retry(code, year, opts, stdclass, retries) @registry.processors[stdclass].get(code, year, opts) rescue RelatonBib::RequestError => e raise e unless retries > 1 net_retry(code, year, opts, stdclass, retries - 1) end # @param bib [RelatonBib::BibliographicItem, # RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem, # RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem, # RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem, # RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem, # RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem, # RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem, # RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem] # @return [String] XML or "not_found mm-dd-yyyy" def bib_entry(bib) if bib.respond_to? :to_xml bib.to_xml(bibdata: true) else "not_found #{Date.today}" end end # @param dir [String, nil] DB directory # @param type [Symbol] # @return [Relaton::DbCache, NilClass] def open_cache_biblio(dir, type: :static) # rubocop:disable Metrics/MethodLength return nil if dir.nil? db = DbCache.new dir, type == :static ? "yml" : "xml" return db if type == :static Dir["#{dir}/*/"].each do |fdir| next if db.check_version?(fdir) FileUtils.rm_rf(fdir, secure: true) Util.log( "[relaton] WARNING: cache #{fdir}: version is obsolete and cache is "\ "cleared.", :warning ) end db end # @param qwp [Hash] # @option qwp [Queue] :queue The queue of references to fetch # @option qwp [Relaton::WorkersPool] :workers_pool The pool of workers def process_queue(qwp) while args = qwp[:queue].pop; qwp[:workers_pool] << args end end class << self # Initialse and return relaton instance, with local and global cache names # local_cache: local cache name; none created if nil; "relaton" created # if empty global_cache: boolean to create global_cache # flush_caches: flush caches def init_bib_caches(**opts) # rubocop:disable Metrics/CyclomaticComplexity globalname = global_bibliocache_name if opts[:global_cache] localname = local_bibliocache_name(opts[:local_cache]) flush_caches globalname, localname if opts[:flush_caches] Relaton::Db.new(globalname, localname) end private def flush_caches(gcache, lcache) FileUtils.rm_rf gcache unless gcache.nil? FileUtils.rm_rf lcache unless lcache.nil? end def global_bibliocache_name "#{Dir.home}/.relaton/cache" end def local_bibliocache_name(cachename) cachename = "relaton" if cachename.nil? || cachename.empty? "#{cachename}/cache" end end end end