# frozen_string_literal: true module Cocina module Models module Mapping module Normalizers # Normalizes a Fedora MODS document, accounting for differences between Fedora MODS and MODS generated from Cocina. # these adjustments have been approved by our metadata authority, Arcadia. class ModsNormalizer # rubocop:disable Metrics/ClassLength include Cocina::Models::Mapping::Normalizers::Base MODS_NS = Cocina::Models::Mapping::FromMods::Description::DESC_METADATA_NS XLINK_NS = Cocina::Models::Mapping::FromMods::Description::XLINK_NS # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized # @param [String] druid # @param [String] label # @return [Nokogiri::Document] normalized MODS def self.normalize(mods_ng_xml:, druid:, label:) new(mods_ng_xml: mods_ng_xml, druid: druid, label: label).normalize end # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized # @param [String] druid # @return [Nokogiri::Document] normalized MODS def self.normalize_purl(mods_ng_xml:, druid:) new(mods_ng_xml: mods_ng_xml, druid: druid).normalize_purl end # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized # @param [String] druid # @param [String] label # @return [Nokogiri::Document] normalized MODS def self.normalize_purl_and_missing_title(mods_ng_xml:, druid:, label:) new(mods_ng_xml: mods_ng_xml, druid: druid, label: label).normalize_purl_and_missing_title end # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized # @return [Nokogiri::Document] normalized MODS def self.normalize_identifier_type(mods_ng_xml:) new(mods_ng_xml: mods_ng_xml, druid: nil).normalize_identifier_type end def initialize(mods_ng_xml:, druid:, label: nil) @ng_xml = mods_ng_xml.root ? mods_ng_xml.dup : blank_ng_xml @ng_xml.encoding = 'UTF-8' @druid = druid @label = label end def normalize normalize_default_namespace normalize_xsi normalize_version normalize_empty_attributes normalize_authority_uris # must be called before OriginInfoNormalizer @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::OriginInfoNormalizer.normalize(mods_ng_xml: ng_xml) @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::SubjectNormalizer.normalize(mods_ng_xml: ng_xml) @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::NameNormalizer.normalize(mods_ng_xml: ng_xml) normalize_related_item_other_type normalize_unmatched_altrepgroup normalize_unmatched_nametitlegroup normalize_xml_space normalize_language_term_type normalize_access_condition normalize_identifier_type_attr normalize_location_physical_location normalize_purl_location normalize_empty_notes @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::TitleNormalizer.normalize(mods_ng_xml: ng_xml, label: label) @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::GeoExtensionNormalizer.normalize(mods_ng_xml: ng_xml, druid: druid) normalize_empty_type_of_resource # Must be after normalize_empty_attributes normalize_notes normalize_abstracts normalize_usage_primary normalize_related_item_attributes # This should be last-ish. normalize_empty_related_items remove_empty_elements(ng_xml.root) # this must be last ng_xml end def normalize_purl normalize_purl_location ng_xml end def normalize_purl_and_missing_title normalize_purl_location @ng_xml = Cocina::Models::Mapping::Normalizers::Mods::TitleNormalizer.normalize_missing_title(mods_ng_xml: ng_xml, label: label) ng_xml end def normalize_identifier_type normalize_identifier_type_attr ng_xml end private attr_reader :ng_xml, :druid, :label # remove all empty elements that have no attributes and no children, recursively def remove_empty_elements(start_node) return unless start_node # remove node if there are no element children, there is no text value and there are no attributes if start_node.elements.empty? && start_node.text.blank? && start_node.attributes.empty? && start_node.name != 'etal' parent = start_node.parent start_node.remove remove_empty_elements(parent) # need to call again after child has been deleted else start_node.element_children.each { |e| remove_empty_elements(e) } end end def normalize_default_namespace xml = ng_xml.to_s unless xml.include?('xmlns="http://www.loc.gov/mods/v3"') xml.sub!('mods:mods', 'mods:mods xmlns="http://www.loc.gov/mods/v3"') xml.gsub!('mods:', '') end regenerate_ng_xml(xml) end def normalize_xsi return if ng_xml.namespaces.include?('xmlns:xsi') xml = ng_xml.to_s xml.sub!(' XML ) end end end end end end