lib/relaton_iso/iso_bibliography.rb in relaton-iso-1.18.1 vs lib/relaton_iso/iso_bibliography.rb in relaton-iso-1.18.2

- old
+ new

@@ -4,208 +4,200 @@ require "relaton_iso/scrapper" require "relaton_iso/hit_collection" # require "relaton_iec" module RelatonIso - # Class methods for search ISO standards. - class IsoBibliography - class << self - # @param text [String] - # @return [RelatonIso::HitCollection] - def search(text) - HitCollection.new(text.gsub("\u2013", "-")).fetch - rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, - EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, - Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT, - Algolia::AlgoliaUnreachableHostError => e - raise RelatonBib::RequestError, e.message - end + # Methods for search ISO standards. + module IsoBibliography + extend self - # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000") - # @param year [String, NilClass] the year the standard was published - # @param opts [Hash] options; restricted to :all_parts if all-parts - # @option opts [Boolean] :all_parts if all-parts reference is required - # @option opts [Boolean] :keep_year if undated reference should return - # actual reference with year - # - # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference - def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize - code = ref.gsub("\u2013", "-") + # @param text [Pubid::Iso::Identifier, String] + # @return [RelatonIso::HitCollection] + def search(pubid, opts = {}) + pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String + HitCollection.new(pubid, opts).fetch + rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, + EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, + Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT, + Algolia::AlgoliaUnreachableHostError => e + raise RelatonBib::RequestError, e.message + end - # parse "all parts" request - code.sub! " (all parts)", "" - opts[:all_parts] ||= $~ && opts[:all_parts].nil? + # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000") + # @param year [String, NilClass] the year the standard was published + # @param opts [Hash] options; restricted to :all_parts if all-parts + # @option opts [Boolean] :all_parts if all-parts reference is required + # @option opts [Boolean] :keep_year if undated reference should return + # actual reference with year + # + # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item + def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize + code = ref.gsub("\u2013", "-") - query_pubid = Pubid::Iso::Identifier.parse(code) - query_pubid.year = year if year - query_pubid.part = nil if opts[:all_parts] - Util.warn "(#{query_pubid}) Fetching from iso.org ..." + # parse "all parts" request + code.sub! " (all parts)", "" + opts[:all_parts] ||= $~ && opts[:all_parts].nil? - hits, missed_year_ids = isobib_search_filter(query_pubid, opts) - tip_ids = look_up_with_any_types_stages(hits, ref, opts) + query_pubid = Pubid::Iso::Identifier.parse(code) + query_pubid.root.year = year.to_i if year&.respond_to?(:to_i) + Util.warn "(#{query_pubid}) Fetching from Relaton repository ..." - ret = if !opts[:all_parts] || hits.size == 1 - hits.any? && hits.first.fetch(opts[:lang]) - else - hits.to_all_parts(opts[:lang]) - end + hits, missed_year_ids = isobib_search_filter(query_pubid, opts) + tip_ids = look_up_with_any_types_stages(hits, ref, opts) + ret = hits.fetch_doc + return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret - return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret + response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "") + Util.warn "(#{query_pubid}) Found: `#{response_pubid}`" + get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts] + return ret if get_all - response_docid = ret.docidentifier.first.id.sub(" (all parts)", "") - response_pubid = Pubid::Iso::Identifier.parse(response_docid) + ret.to_most_recent_reference + rescue Pubid::Core::Errors::ParseError + Util.warn "(#{code}) Is not recognized as a standards identifier." + nil + end - Util.warn "(#{query_pubid}) Found: `#{response_pubid}`" + # @param query_pubid [Pubid::Iso::Identifier] + # @param pubid [Pubid::Iso::Identifier] + # @param all_parts [Boolean] match with any parts when true + # @return [Boolean] + def matches_parts?(query_pubid, pubid, all_parts: false) + # match only with documents with part number + return !pubid.part.nil? if all_parts - get_all = ( - (query_pubid.year && opts[:keep_year].nil?) || - opts[:keep_year] || - opts[:all_parts] - ) - return ret if get_all + query_pubid.part == pubid.part + end - ret.to_most_recent_reference - rescue Pubid::Core::Errors::ParseError - Util.warn "(#{code}) Is not recognized as a standards identifier." - nil - end + # + # Matches base of query_pubid and pubid. + # + # @param [Pubid::Iso::Identifier] query_pubid pubid to match + # @param [Pubid::Iso::Identifier] pubid pubid to match + # @param [Boolean] any_types_stages match with any types and stages + # + # @return [<Type>] <description> + # + def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity + return false unless pubid.respond_to?(:publisher) - # @param query_pubid [Pubid::Iso::Identifier] - # @param pubid [Pubid::Iso::Identifier] - # @param all_parts [Boolean] match with any parts when true - # @return [Boolean] - def matches_parts?(query_pubid, pubid, all_parts: false) - # match only with documents with part number - return !pubid.part.nil? if all_parts + query_pubid.publisher == pubid.publisher && + query_pubid.number == pubid.number && + query_pubid.copublisher == pubid.copublisher && + (any_types_stages || query_pubid.stage == pubid.stage) && + (any_types_stages || query_pubid.is_a?(pubid.class)) + end - query_pubid.part == pubid.part - end + # @param hit_collection [RelatonIso::HitCollection] + # @param year [String] + # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs + def filter_hits_by_year(hit_collection, year) + missed_year_ids = Set.new + return [hit_collection, missed_year_ids] if year.nil? - # - # Matches base of query_pubid and pubid. - # - # @param [Pubid::Iso::Identifier] query_pubid pubid to match - # @param [Pubid::Iso::Identifier] pubid pubid to match - # @param [Boolean] any_types_stages match with any types and stages - # - # @return [<Type>] <description> - # - def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity - return false unless pubid.respond_to?(:publisher) + # filter by year + hits = hit_collection.select do |hit| + hit.pubid.year ||= hit.hit[:year] + next true if check_year(year, hit) - query_pubid.publisher == pubid.publisher && - query_pubid.number == pubid.number && - query_pubid.copublisher == pubid.copublisher && - (any_types_stages || query_pubid.stage == pubid.stage) && - (any_types_stages || query_pubid.is_a?(pubid.class)) + missed_year_ids << hit.pubid.to_s if hit.pubid.year + false end - # @param hit_collection [RelatonIso::HitCollection] - # @param year [String] - # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs - def filter_hits_by_year(hit_collection, year) - missed_year_ids = Set.new - return [hit_collection, missed_year_ids] if year.nil? + [hits, missed_year_ids] + end - # filter by year - hits = hit_collection.select do |hit| - hit.pubid.year ||= hit.hit[:year] - next true if check_year(year, hit) + private - missed_year_ids << hit.pubid.to_s if hit.pubid.year - false - end + def check_year(year, hit) # rubocop:disable Metrics/AbcSize + (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) || + (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) || + (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) + end - [hits, missed_year_ids] + # @param pubid [Pubid::Iso::Identifier] PubID with no results + def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize + Util.warn "(#{pubid}) Not found." + + if missed_year_ids.any? + ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ") + Util.warn "(#{pubid}) TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}." end - private + if tip_ids.any? + ids = tip_ids.map { |i| "`#{i}`" }.join(", ") + Util.warn "(#{pubid}) TIP: Matches exist for #{ids}." + end - def check_year(year, hit) # rubocop:disable Metrics/AbcSize - (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) || - (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) || - (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) + if pubid.part + Util.warn "(#{pubid}) TIP: If it cannot be found, the document may no longer be published in parts." + else + Util.warn "(#{pubid}) TIP: If you wish to cite all document parts for the reference, " \ + "use `#{pubid.to_s(format: :ref_undated)} (all parts)`." end - # @param pubid [Pubid::Iso::Identifier] PubID with no results - def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize - Util.warn "(#{pubid}) Not found." + nil + end - if missed_year_ids.any? - ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ") - Util.warn "(#{pubid}) TIP: No match for edition year " \ - "#{pubid.year}, but matches exist for #{ids}." - end + def look_up_with_any_types_stages(hits, ref, opts) + return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/) - if tip_ids.any? - ids = tip_ids.map { |i| "`#{i}`" }.join(", ") - Util.warn "(#{pubid}) TIP: Matches exist for #{ids}." - end + ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO") + pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage) + resp, = isobib_search_filter(pubid, opts, any_types_stages: true) + resp.map &:pubid + end - if pubid.part - Util.warn "(#{pubid}) TIP: If it cannot be found, " \ - "the document may no longer be published in parts." - else - Util.warn "(#{pubid}) TIP: If you wish to cite " \ - "all document parts for the reference, use " \ - "`#{pubid.to_s(format: :ref_undated)} (all parts)`." - end + # + # Search for hits. If no found then trying missed stages. + # + # @param query_pubid [Pubid::Iso::Identifier] reference without correction + # @param opts [Hash] + # @param any_types_stages [Boolean] match with any stages + # + # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years + # + def isobib_search_filter(query_pubid, opts, any_types_stages: false) + hit_collection = search(query_pubid, opts) - nil - end + # filter only matching hits + filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages + end - def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength - found_ids = [] - return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/) - - ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO") - pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage) - resp, = isobib_search_filter(pubid, opts, any_types_stages: true) - resp.map &:pubid + # + # Filter hits by query_pubid. + # + # @param hit_collection [RelatonIso::HitCollection] + # @param query_pubid [Pubid::Iso::Identifier] + # @param all_parts [Boolean] + # @param any_types_stages [Boolean] + # + # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs + # + def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages) + # filter out + excludings = build_excludings(all_parts, any_types_stages) + no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings) + result = hit_collection.select do |i| + pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?) end - # - # Search for hits. If no found then trying missed stages. - # - # @param query_pubid [Pubid::Iso::Identifier] reference without correction - # @param opts [Hash] - # @param any_types_stages [Boolean] match with any stages - # - # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years - # - def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity - query_pubid_without_year = query_pubid.dup - # remove year for query - query_pubid_without_year.year = nil - hit_collection = search(query_pubid_without_year.to_s) + filter_hits_by_year(result, query_pubid.year) + end - # filter only matching hits - filter_hits hit_collection, query_pubid, opts[:all_parts], - any_types_stages - end + def build_excludings(all_parts, any_types_stages) + excludings = %i[year edition] + excludings += %i[type stage iteration] if any_types_stages + excludings << :part if all_parts + excludings + end - # - # Filter hits by query_pubid. - # - # @param hit_collection [RelatonIso::HitCollection] - # @param query_pubid [Pubid::Iso::Identifier] - # @param all_parts [Boolean] - # @param any_stypes_tages [Boolean] - # - # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs - # - def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity - # filter out - result = hit_collection.select do |i| - hit_pubid = i.pubid - matches_base?(query_pubid, hit_pubid, - any_types_stages: any_stypes_tages) && - matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) && - query_pubid.corrigendums == hit_pubid.corrigendums && - query_pubid.amendments == hit_pubid.amendments - end - - filter_hits_by_year(result, query_pubid.year) + def pubid_match?(pubid, query_pubid, excludings, no_year_ref) + if pubid.is_a? String then pubid == query_pubid.to_s + else + pubid = pubid.dup + pubid.base = pubid.base.exclude(:year, :edition) if pubid.base + pubid.exclude(*excludings) == no_year_ref end end end end