# https://github.com/AustinBlues/RSS-Speed-Reader/blob/master/lib/rss_speed_reader.rb # https://github.com/tenderlove/nokogiri/pull/524 module Daengine::TeamsiteMetadataParser @@translation = { 'TeamSite/Metadata/web_title' => 'title', "TeamSite/Metadata/enterprise_last_updated_date"=> 'changed_at', "TeamSite/Metadata/enterprise_audience_id"=> 'audiences', "TeamSite/Metadata/enterprise_sami_desc"=> 'sami_code', "TeamSite/Metadata/enterprise_last_publication_date"=> 'published_at', "TeamSite/Metadata/enterprise_unpublish_date"=> 'unpublished_at', "TeamSite/Metadata/enterprise_expiration_date"=> 'expires_at', "TeamSite/Metadata/enterprise_guid" => 'guid', "TeamSite/Metadata/shortSynopsis" => 'summary', "TeamSite/Metadata/business_owner" => 'business_owner', "TeamSite/Metadata/enterprise_product_id" => 'product_ids' } @@path_tuples = { "path" => 'path', "TeamSite/Metadata/enterprise_last_content_update_date" => 'doc_changed_at', "TeamSite/Metadata/enterprise_content_type_id" => 'content_type' } @@validations = { "TeamSite/Metadata/display_on_website" => lambda { |val| /Y|1/ =~ val }, "TeamSite/Metadata/enterprise_expiration_date" => lambda { |val| Time.parse(val) > 1.minute.from_now }, # "TeamSite/Metadata/enterprise_unpublish_date" => lambda {|val| val.blank? }, "path" => lambda {|val| !(/\/manifest\// =~ val) } } @@logger = nil def self.logger=(some_logger) @@logger = some_logger self end def self.log(args) @@logger.error(args) unless @@logger.blank? end def self.parse_tuple_file(file) time do asset = nil assets = {} docpath = {} valid = true while (line = file.gets) case line when /<\/?data-tuple>/ if (asset.blank?) asset = DigitalAsset.new elsif(valid) assets[asset.guid] ||= asset.attributes # first tuple metadata wins assets[asset.guid]['documents_attributes'] ||= [] assets[asset.guid]['documents_attributes'] << docpath # assets[asset.guid]['_id'] = asset.guid asset = nil; docpath = {}; valid = true; else asset = nil; docpath = {}; valid = true; end when /([^<]+)<\/tuple-field>/ if (valid) if @@validations[$1] valid = @@validations[$1].call($2) end if @@path_tuples[$1] docpath[@@path_tuples[$1]] = $2 # if this is one of our keys, 'send' to the method elsif (@@translation[$1]) val = asset.send("#{@@translation[$1]}").respond_to?(:[]) ? $2.split(',') : $2 asset.send("#{@@translation[$1]}=", val) end end end end # loop thru each doc in the collection, either replace or delete it error_files = [] update_count = 0; delete_count = 0; added_count = 0; assets.keys.each do |key| da = nil begin if(!assets[key]['unpublished_at'].blank?) DigitalAsset.where(guid: key).delete_all delete_count += 1 else da = DigitalAsset.find_or_initialize_by(guid: key) creating = da.new? da.documents = [] da.update_attributes!(assets[key]) if(creating) added_count += 1; else update_count += 1; end end rescue Exception => e error_files << "#{da.try(:guid)}, #{da.try(:errors).try(:full_messages)}" end end log_txt = "TeamsiteMetadataParser: Failed to save/update following DigitalAssets in database:\n" error_files.each do |asset_file| log_txt = log_txt + "> #{asset_file}\n" end Daengine.log(log_txt, "warn") if error_files DigitalAsset.purge! # if the purge criteria is met, purge anything not updated Daengine.log("TeamsiteMetadataParser: #{added_count} records added, #{update_count} updated, #{delete_count} removed", "info") end end def self.time start = Time.now yield self.log "elapsed time was #{Time.now - start}" end end