lib/hybag/ingester.rb in hybag-0.0.7 vs lib/hybag/ingester.rb in hybag-0.0.8
- old
+ new
@@ -9,69 +9,75 @@
def ingest
raise "Unable to determine model from bag" if model_name.blank?
new_object = ActiveFedora.class_from_string(model_name.to_s).new
# Assign a pid
- new_object.inner_object.pid = ActiveFedora::Base.assign_pid(new_object)
+ new_object.inner_object.pid = new_object.inner_object.assign_pid
set_metadata_streams(new_object)
set_file_streams(new_object)
return new_object
end
private
- # TODO: What to do if the bag has files that don't have model definitions?
# TODO: Add some sort of configuration to map bag filenames -> dsids.
def set_metadata_streams(object)
- object.metadata_streams.each do |ds|
- if bag_has_metastream?(ds.dsid)
- ds.content = bag_metastream(ds.dsid).read.strip
- # Assume the first subject in the metadata is about this object.
- # TODO: Move this to configuration?
- first_subject = ds.graph.first_subject
- new_repository = RDF::Repository.new
- ds.graph.each_statement do |statement|
- subject = statement.subject
- subject = ds.rdf_subject if subject == first_subject
- new_repository << [subject, statement.predicate, statement.object]
- end
- ds.instance_variable_set(:@graph,new_repository)
- end
+ bag_tag_files.each do |tag_file|
+ add_bag_file_to_object(object, tag_file, false)
end
end
- def set_file_streams(object)
- file_streams = object.datastreams.select{|k, ds| !ds.metadata?}.values
- file_streams.each do |ds|
- if bag_has_datastream?(ds.dsid)
- ds.content = bag_datastream(ds.dsid).read
- end
- end
+
+ # Returns all registered tag files except those generated for the bag
+ # These includes the bag_info.txt, bagit.txt, and manifest files.
+ def bag_tag_files
+ bag.tag_files - [bag.bag_info_txt_file] - bag.manifest_files - [bag.bagit_txt_file]
end
- # TODO: Might consider decoration at some point.
- def bag_filename_to_label(bag_filename)
- Pathname.new(bag_filename).basename.sub_ext('').to_s
+ def add_bag_file_to_object(object, bag_file, binary=true)
+ parsed_name = bag_filename_to_label(bag_file)
+ found_datastream = object.datastreams.values.find{|x| x.dsid.downcase == bag_filename_to_label(bag_file).downcase}
+ content = File.open(bag_file).read
+ content = transform_content(content) unless binary
+ if found_datastream
+ found_datastream = replace_subject(content, found_datastream)
+ else
+ object.add_file_datastream(content, :dsid => parsed_name)
+ end
end
- def bag_has_datastream?(label)
- bag.bag_files.any?{|x| bag_filename_to_label(x) == label}
+ def transform_content(content)
+ content = content.strip
end
- def bag_datastream(label)
- bag_file = bag.bag_files.select{|x| bag_filename_to_label(x) == label}.first
- result = File.open(bag_file) unless bag_file.blank?
- return result
+ # Replaces the subject in RDF files with the datastream's rdf_subject.
+ # TODO: Deal with what happens when there's no defined datastream.
+ def replace_subject(content, ds)
+ ds.content = content
+ if ds.respond_to?(:rdf_subject)
+ # Assume the first subject in the metadata is about this object.
+ # TODO: Move this to configuration?
+ first_subject = ds.graph.first_subject
+ new_repository = RDF::Repository.new
+ ds.graph.each_statement do |statement|
+ subject = statement.subject
+ subject = ds.rdf_subject if subject == first_subject
+ new_repository << [subject, statement.predicate, statement.object]
+ end
+ ds.instance_variable_set(:@graph,new_repository)
+ end
+ return ds
end
- def bag_has_metastream?(label)
- bag.tag_files.any?{|x| bag_filename_to_label(x) == label}
+ def set_file_streams(object)
+ bag.bag_files.each do |bag_file|
+ add_bag_file_to_object(object, bag_file)
+ end
end
- def bag_metastream(label)
- tag_file = bag.tag_files.select{|x| bag_filename_to_label(x) == label}.first
- result = File.open(tag_file) unless tag_file.blank?
- return result
+ # TODO: Might consider decoration at some point.
+ def bag_filename_to_label(bag_filename)
+ Pathname.new(bag_filename).basename.sub_ext('').to_s
end
def model_name
# TODO: Add a default model_name configuration option?
@model_name ||= extract_model_from_rels || extract_model_from_yaml
\ No newline at end of file