# https://github.com/AustinBlues/RSS-Speed-Reader/blob/master/lib/rss_speed_reader.rb # https://github.com/tenderlove/nokogiri/pull/524 require 'mini_exiftool' module Daengine::TeamsiteMetadataParser @@translation = { 'TeamSite/Metadata/web_title' => 'title', "TeamSite/Metadata/enterprise_last_updated_date"=> 'changed_at', "TeamSite/Metadata/enterprise_audience_id"=> 'audiences', "TeamSite/Metadata/enterprise_sami_desc"=> 'sami_code', "TeamSite/Metadata/enterprise_last_publication_date"=> 'published_at', "TeamSite/Metadata/enterprise_unpublish_date"=> 'unpublished_at', "TeamSite/Metadata/enterprise_expiration_date"=> 'expires_at', "TeamSite/Metadata/enterprise_guid" => 'guid', "TeamSite/Metadata/shortSynopsis" => 'summary', "TeamSite/Metadata/business_owner" => 'business_owner', "TeamSite/Metadata/enterprise_product_id" => 'product_ids', "TeamSite/Metadata/enterprise_content_organization_id" => 'content_organization_ids', "TeamSite/Metadata/enterprise_program_id" => 'program_ids', "TeamSite/Metadata/omnitureSiteSection_codes" => 'omniture_codes' } @@path_tuples = { "path" => 'path', "TeamSite/Metadata/enterprise_last_content_update_date" => 'doc_changed_at', "TeamSite/Metadata/enterprise_content_type_id" => 'content_type' } @@validations = { "TeamSite/Metadata/display_on_website" => lambda { |val| /Y|1|true/ =~ val }, "TeamSite/Metadata/enterprise_expiration_date" => lambda {|val| !val.blank? }, # "TeamSite/Metadata/enterprise_unpublish_date" => lambda {|val| val.blank? }, #"path" => lambda {|val| !(/\/manifest\// =~ val) } } @@logger = nil def self.logger=(some_logger) @@logger = some_logger self end def self.log(args) @@logger.error(args) unless @@logger.blank? end def self.parse_tuple_file(file, last_read = nil) time do asset = nil assets = {} docpath = {} valid = true while (line = file.gets) case line when /<\/?data-tuple>/ if (asset.blank?) asset = DigitalAsset.new elsif (valid) assets[asset.guid] ||= asset.attributes # first tuple metadata wins assets[asset.guid]['documents_attributes'] ||= [] assets[asset.guid]['documents_attributes'] << docpath # assets[asset.guid]['_id'] = asset.guid asset = nil; docpath = {}; valid = true; else asset = nil; docpath = {}; valid = true; end when /([^<]+)<\/tuple-field>/ if (valid) if @@validations[$1] valid = @@validations[$1].call($2) end if @@path_tuples[$1] docpath[@@path_tuples[$1]] = $2 # if this is one of our keys, 'send' to the method elsif (@@translation[$1]) val = asset.send("#{@@translation[$1]}").respond_to?(:[]) ? $2.split(',') : $2 asset.send("#{@@translation[$1]}=", val) end end end end # loop thru each doc in the collection, either replace or delete it error_files = [] update_count = 0; delete_count = 0; added_count = 0; assets.keys.each do |key| da = nil begin if (!assets[key]['unpublished_at'].nil?) DigitalAsset.where(guid: key).try :delete_all delete_count += 1 elsif (assets[key]['expires_at'].nil? || (assets[key]['expires_at'] < 1.minute.from_now)) DigitalAsset.where(guid: key).try :delete_all delete_count += 1 else asset_docs = trim_package(assets[key]['documents_attributes'], last_read) da = DigitalAsset.find_or_initialize_by(guid: key) if (!asset_docs.empty?) assets[key]['documents_attributes'] = asset_docs creating = da.new? da.documents = [] da.update_attributes!(assets[key]) creating ? added_count += 1 : update_count += 1 else DigitalAsset.where(guid: key).try :delete_all delete_count += 1 end end rescue Exception => e #puts "--**Exception**--- #{e}" error_files << "#{e} ---- #{da.try(:guid)}, #{da.try(:errors).try(:full_messages)}" end end log_txt = "TeamsiteMetadataParser: Failed to save/update following DigitalAssets in database:\n" error_files.each do |asset_file| log_txt << "> #{asset_file}\n" end Daengine.log(log_txt, "warn") unless error_files.empty? DigitalAsset.purge! # if the purge criteria is met, purge anything not updated Daengine.log("TeamsiteMetadataParser: #{added_count} records added, #{update_count} updated, #{delete_count} removed", "info") end end def self.trim_package(asset_docs, last_read = nil) docs = [] path = Daengine.config[:digital_assets_file_directory] asset_docs.each do |doc| #exclude manifest_file unless doc['path'].match('\/manifest\/') file = File.join(path, doc['path']) #exclude digital_asset_files that are not in *Teamsite Staging* if File::exist?(file) docs << doc if(File.mtime(file) > last_read) begin exifdata = ::MiniExiftool.new file doc['pages'] = exifdata.pages # or exifdata['Slides'] doc['size'] = exifdata.filesize doc['mime_type'] = exifdata.mimetype rescue Exception => e p "Error reading metadata from #{file} #{e.message}" Daengine.log "Error reading metadata from #{file} #{e.message}", "error" end end else # the file was missing on disk, show a warning! Daengine.log("TeamsiteMetadataParser: Unable to locate file #{file} on disk! Removing from metadata", "warn") end end end docs end def self.time start = Time.now yield self.log "elapsed time was #{Time.now - start}" end end