# coding: utf-8 require 'stringex' module Mabmapper module MabXml class Document include QueryHelper attr_accessor :xml def initialize(contents) @xml = Nokogiri::XML(contents) @xml.remove_namespaces! end # # Returns the contents of a MAB control field as an array. # def controlfield(name) xpath = "//controlfield[@tag='#{name}']" result = @xml.at_xpath(xpath).try(:text) result.present? ? result.chars.to_a.map{|e| (e=='|') ? nil : e} : [] end # Normalize text based on the NACO rules def naco_normalization(value) return unless value.present? # Downcase everything value.downcase! # Convert unicode [and accented ASCII] characters to their # plain-text ASCII equivalents Stringex::Localization.backend = :internal Stringex::Localization.locale = :de Stringex::Localization.store_translations(:de, :transliterations, {"ü" => "ue", "ä" => "ae", "ö" => "oe", "ß" => "ss"}) if Stringex::Localization.backend.translations.blank? value = value.to_ascii # Convert special chars to spaces value.gsub!(/[,$~^%*\/?@.:;<>{}!\(\)\-]/, ' ') # Convert special chars to spaces value.gsub!(/[\[\]\|]/, '') # Remove leading and trailing spaces value.strip! # Condense multiple spaces value.gsub!(/\s+/, ' ') value end end end end