lib/chronicle/etl/transformers/image_file_transformer.rb in chronicle-etl-0.3.1 vs lib/chronicle/etl/transformers/image_file_transformer.rb in chronicle-etl-0.4.0

- old
+ new

@@ -17,25 +17,19 @@ register_connector do |r| r.identifier = 'image-file' r.description = 'an image file' end - DEFAULT_OPTIONS = { - timestamp_strategy: 'file_mtime', - id_strategy: 'file_hash', - verb: 'photographed', + setting :timestamp_strategy, default: 'file_mtime' + setting :id_strategy, default: 'file_hash' + setting :verb, default: 'photographed' + # EXIF tags often don't have timezones + setting :timezone_default, default: 'Eastern Time (US & Canada)' + setting :include_image_data, default: true + setting :actor + setting :involved - # EXIF tags often don't have timezones - timezone_default: 'Eastern Time (US & Canada)', - include_image_data: true - }.freeze - - def initialize(*args) - super(*args) - @options = @options.reverse_merge(DEFAULT_OPTIONS) - end - def transform # FIXME: set @filename; use block for reading file when necessary @file = File.open(@extraction.data) record = build_created(@file) @file.close @@ -46,32 +40,32 @@ @file.path end def id @id ||= begin - id = build_with_strategy(field: :id, strategy: @options[:id_strategy]) + id = build_with_strategy(field: :id, strategy: @config.id_strategy) raise UntransformableRecordError.new("Could not build id", transformation: self) unless id id end end def timestamp @timestamp ||= begin - ts = build_with_strategy(field: :timestamp, strategy: @options[:timestamp_strategy]) + ts = build_with_strategy(field: :timestamp, strategy: @config.timestamp_strategy) raise UntransformableRecordError.new("Could not build timestamp", transformation: self) unless ts ts end end private def build_created(file) record = ::Chronicle::ETL::Models::Activity.new - record.verb = @options[:verb] - record.provider = @options[:provider] + record.verb = @config.verb + record.provider = @config.provider record.provider_id = id record.end_at = timestamp record.dedupe_on = [[:provider_id, :verb, :provider]] record.involved = build_image @@ -82,38 +76,38 @@ end def build_actor actor = ::Chronicle::ETL::Models::Entity.new actor.represents = 'identity' - actor.provider = @options[:actor][:provider] - actor.slug = @options[:actor][:slug] + actor.provider = @config.actor[:provider] + actor.slug = @config.actor[:slug] actor.dedupe_on = [[:provider, :slug, :represents]] actor end def build_image image = ::Chronicle::ETL::Models::Entity.new - image.represents = @options[:involved][:represents] + image.represents = @config.involved[:represents] image.title = build_title image.body = exif['Description'] - image.provider = @options[:involved][:provider] + image.provider = @config.involved[:provider] image.provider_id = id image.assign_attributes(build_gps) image.dedupe_on = [[:provider, :provider_id, :represents]] - if @options[:ocr_strategy] - ocr_text = build_with_strategy(field: :ocr, strategy: @options[:ocr_strategy]) + if @config.ocr_strategy + ocr_text = build_with_strategy(field: :ocr, strategy: @config.ocr_strategy) image.metadata[:ocr_text] = ocr_text if ocr_text end names = extract_people_depicted tags = extract_keywords(names) image.depicts = build_people_depicted(names) image.abouts = build_keywords(tags) - if @options[:include_image_data] + if @config.include_image_data attachment = ::Chronicle::ETL::Models::Attachment.new attachment.data = build_image_data image.attachments = [attachment] end @@ -122,11 +116,11 @@ def build_keywords(topics) topics.map do |topic| t = ::Chronicle::ETL::Models::Entity.new t.represents = 'topic' - t.provider = @options[:involved][:provider] + t.provider = @config.involved[:provider] t.title = topic t.slug = topic.parameterize t.dedupe_on = [[:provider, :represents, :slug]] t end @@ -134,11 +128,11 @@ def build_people_depicted(names) names.map do |name| identity = ::Chronicle::ETL::Models::Entity.new identity.represents = 'identity' - identity.provider = @options[:involved][:provider] + identity.provider = @config.involved[:provider] identity.slug = name.parameterize identity.title = name identity.dedupe_on = [[:provider, :represents, :slug]] identity end @@ -197,10 +191,10 @@ # use it instead of UTC timestamp = timestamp.change(offset: exif['OffsetTimeOriginal']) elsif false # TODO: support option of using GPS coordinates to determine timezone else - zone = ActiveSupport::TimeZone.new(@options[:timezone_default]) + zone = ActiveSupport::TimeZone.new(@config.timezone_default) timestamp = zone.parse(timestamp.asctime) end timestamp end