require "relaton_nist/nist_bibliographic_item" require "relaton_nist/document_relation" require "relaton_nist/scrapper" require "relaton_nist/hit_collection" require "relaton_nist/xml_parser" require "relaton_nist/comment_period" require "relaton_nist/document_status" require "relaton_nist/hash_converter" module RelatonNist class NistBibliography class << self # # Search NIST documents by reference # # @param text [String] reference # # @return [RelatonNist::HitCollection] search result # def search(text, year = nil, opts = {}) ref = text.sub(/^NISTIR/, "NIST IR") HitCollection.search ref, year, opts rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e raise RelatonBib::RequestError, e.message end # # Get NIST document by reference # # @param code [String] the NIST standard Code to look up (e..g "8200") # @param year [String] the year the standard was published (optional) # # @param opts [Hash] options # @option opts [Boolean] :all_parts restricted to all parts # if all-parts reference is required # # @return [RelatonNist::NistBibliographicItem, nil] bibliographic item # def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity return fetch_ref_err(code, year, []) if code.match?(/\sEP$/) /^(?[^(]+)(?:\((?\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?(?:I|F|\d)PD))?/ =~ code stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s if code2 code = code2.strip if date2 case date2 when /\w+\s\d{4}/ opts[:date] = Date.strptime date2, "%B %Y" when /\w+\s\d{2},\s\d{4}/ opts[:date] = Date.strptime date2, "%B %d, %Y" end end opts[:stage] = stage if stage end if year.nil? /^(?[^:]+):(?[^:]+)$/ =~ code unless code1.nil? code = code1 year = year1 end end code += "-1" if opts[:all_parts] nistbib_get(code, year, opts) end private # # Get NIST document by reference # # @param [String] code reference # @param [String] year year # @param [Hash] opts options # @option opts [Date] :issued_date issued date # @option opts [Date] :updated_date updated date # @option opts [String] :stage stage # # @return [RelatonNist::NistBibliographicItem, nil] bibliographic item # def nistbib_get(code, year, opts) result = nistbib_search_filter(code, year, opts) || (return nil) ret = nistbib_results_filter(result, year, opts) if ret[:ret] Util.warn "(#{code}) found `#{ret[:ret].docidentifier.first.id}`" ret[:ret] else fetch_ref_err(code, year, ret[:years]) end end # # Sort through the results from RelatonNist, fetching them three at a time, # and return the first result that matches the code, # matches the year (if provided), and which # has a title (amendments do not). # Only expects the first page of results to be populated. # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007) # If no match, returns any years which caused mismatch, for error reporting # # @param opts [Hash] options # @option opts [Date] :issued_date issued date # @option opts [Date] :issued_date issued date # @option opts [String] :stage stage # # @return [Hash] result # def nistbib_results_filter(result, year, opts) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity missed_years = [] iter = /\w+(?=PD)|(?<=PD-)\w+/.match(opts[:stage])&.to_s iteration = case iter when "I" then "1" when "F" then "final" else iter end result.each_slice(3) do |s| # ISO website only allows 3 connections fetch_pages(s, 3).each_with_index do |r, _i| if opts[:date] dates = r.date.select { |d| d.on(:date) == opts[:date] } next if dates.empty? end next if iter && r.status.iteration != iteration return { ret: r } if !year r.date.select { |d| d.type == "published" }.each do |d| return { ret: r } if year.to_i == d.on(:year) missed_years << d.on(:year) end end end { years: missed_years } end # # Fetch pages for all the hits in parallel # # @param hits [RelatonNist::HitCollection] hits # @param threads [Integer] number of threads # # @return [Array] bibliographic items # def fetch_pages(hits, threads) workers = RelatonBib::WorkersPool.new threads workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } } hits.each_with_index { |hit, i| workers << { i: i, hit: hit } } workers.end workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] } end # # Get search results and filter them by code and year # # @param code [String] reference # @param year [String, nil] year # @param opts [Hash] options # @option opts [String] :stage stage # # @return [RelatonNist::HitCollection] hits collection # def nistbib_search_filter(code, year, opts) Util.warn "(#{code}) fetching..." result = search(code, year, opts) result.search_filter end # # Outputs warning message if no match found # # @param [String] code reference # @param [String, nil] year year # @param [Array] missed_years missed years # # @return [nil] nil # def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength id = year ? "#{code}:#{year}" : code Util.warn "WARNING: no match found online for `#{id}`. " \ "The code must be exactly like it is on the standards website." unless missed_years.empty? Util.warn "(There was no match for #{year}, though there " \ "were matches found for `#{missed_years.join('`, `')}`.)" end if /\d-\d/.match? code Util.warn "The provided document part may not exist, " \ "or the document may no longer be published in parts." end nil end end end end