require 'rdf/rdfxml' module Hybag class Ingester attr_accessor :bag def initialize(bag) @bag = bag end def ingest raise "Unable to determine model from bag" if model_name.blank? new_object = ActiveFedora.class_from_string(model_name.to_s).new # Assign a pid new_object.inner_object.pid = new_object.inner_object.assign_pid set_metadata_streams(new_object) set_file_streams(new_object) return new_object end private # TODO: Add some sort of configuration to map bag filenames -> dsids. def set_metadata_streams(object) bag_tag_files.each do |tag_file| add_bag_file_to_object(object, tag_file, false) end end # Returns all registered tag files except those generated for the bag # These includes the bag_info.txt, bagit.txt, and manifest files. def bag_tag_files bag.tag_files - [bag.bag_info_txt_file] - bag.manifest_files - [bag.bagit_txt_file] end def add_bag_file_to_object(object, bag_file, binary=true) parsed_name = bag_filename_to_label(bag_file) found_datastream = object.datastreams.values.find{|x| x.dsid.downcase == bag_filename_to_label(bag_file).downcase} content = File.open(bag_file).read content = transform_content(content) unless binary if found_datastream found_datastream = replace_subject(content, found_datastream) else object.add_file_datastream(content, :dsid => parsed_name) end end def transform_content(content) content = content.strip end # Replaces the subject in RDF files with the datastream's rdf_subject. # TODO: Deal with what happens when there's no defined datastream. def replace_subject(content, ds) ds.content = content if ds.respond_to?(:rdf_subject) # Assume the first subject in the metadata is about this object. # TODO: Move this to configuration? first_subject = ds.graph.first_subject new_repository = RDF::Repository.new ds.graph.each_statement do |statement| subject = statement.subject subject = ds.rdf_subject if subject == first_subject new_repository << [subject, statement.predicate, statement.object] end ds.instance_variable_set(:@graph,new_repository) end return ds end def set_file_streams(object) bag.bag_files.each do |bag_file| add_bag_file_to_object(object, bag_file) end end # TODO: Might consider decoration at some point. def bag_filename_to_label(bag_filename) Pathname.new(bag_filename).basename.sub_ext('').to_s end def model_name # TODO: Add a default model_name configuration option? @model_name ||= extract_model_from_rels || extract_model_from_yaml end def extract_model_from_rels if File.exist?(fedora_rels) filler_object = ActiveFedora::Base.new rels_datastream = ActiveFedora::RelsExtDatastream.new rels_datastream.model = filler_object ActiveFedora::RelsExtDatastream.from_xml(File.read(fedora_rels).strip,rels_datastream) model_name = ActiveFedora::ContentModel.known_models_for(filler_object).first return model_name.to_s end return model_name end def extract_model_from_yaml model_name = nil if(File.exist?(yaml_config)) conf = YAML.load(File.read(yaml_config)) model_name = conf['model'] end return model_name end def yaml_config File.join(bag.bag_dir,"hybag.yml") end def fedora_rels File.join(bag.bag_dir,"fedora","RELS-EXT.rdf") end end end