lib/chronicle/etl/transformers/image_file_transformer.rb in chronicle-etl-0.3.1 vs lib/chronicle/etl/transformers/image_file_transformer.rb in chronicle-etl-0.4.0
- old
+ new
@@ -17,25 +17,19 @@
register_connector do |r|
r.identifier = 'image-file'
r.description = 'an image file'
end
- DEFAULT_OPTIONS = {
- timestamp_strategy: 'file_mtime',
- id_strategy: 'file_hash',
- verb: 'photographed',
+ setting :timestamp_strategy, default: 'file_mtime'
+ setting :id_strategy, default: 'file_hash'
+ setting :verb, default: 'photographed'
+ # EXIF tags often don't have timezones
+ setting :timezone_default, default: 'Eastern Time (US & Canada)'
+ setting :include_image_data, default: true
+ setting :actor
+ setting :involved
- # EXIF tags often don't have timezones
- timezone_default: 'Eastern Time (US & Canada)',
- include_image_data: true
- }.freeze
-
- def initialize(*args)
- super(*args)
- @options = @options.reverse_merge(DEFAULT_OPTIONS)
- end
-
def transform
# FIXME: set @filename; use block for reading file when necessary
@file = File.open(@extraction.data)
record = build_created(@file)
@file.close
@@ -46,32 +40,32 @@
@file.path
end
def id
@id ||= begin
- id = build_with_strategy(field: :id, strategy: @options[:id_strategy])
+ id = build_with_strategy(field: :id, strategy: @config.id_strategy)
raise UntransformableRecordError.new("Could not build id", transformation: self) unless id
id
end
end
def timestamp
@timestamp ||= begin
- ts = build_with_strategy(field: :timestamp, strategy: @options[:timestamp_strategy])
+ ts = build_with_strategy(field: :timestamp, strategy: @config.timestamp_strategy)
raise UntransformableRecordError.new("Could not build timestamp", transformation: self) unless ts
ts
end
end
private
def build_created(file)
record = ::Chronicle::ETL::Models::Activity.new
- record.verb = @options[:verb]
- record.provider = @options[:provider]
+ record.verb = @config.verb
+ record.provider = @config.provider
record.provider_id = id
record.end_at = timestamp
record.dedupe_on = [[:provider_id, :verb, :provider]]
record.involved = build_image
@@ -82,38 +76,38 @@
end
def build_actor
actor = ::Chronicle::ETL::Models::Entity.new
actor.represents = 'identity'
- actor.provider = @options[:actor][:provider]
- actor.slug = @options[:actor][:slug]
+ actor.provider = @config.actor[:provider]
+ actor.slug = @config.actor[:slug]
actor.dedupe_on = [[:provider, :slug, :represents]]
actor
end
def build_image
image = ::Chronicle::ETL::Models::Entity.new
- image.represents = @options[:involved][:represents]
+ image.represents = @config.involved[:represents]
image.title = build_title
image.body = exif['Description']
- image.provider = @options[:involved][:provider]
+ image.provider = @config.involved[:provider]
image.provider_id = id
image.assign_attributes(build_gps)
image.dedupe_on = [[:provider, :provider_id, :represents]]
- if @options[:ocr_strategy]
- ocr_text = build_with_strategy(field: :ocr, strategy: @options[:ocr_strategy])
+ if @config.ocr_strategy
+ ocr_text = build_with_strategy(field: :ocr, strategy: @config.ocr_strategy)
image.metadata[:ocr_text] = ocr_text if ocr_text
end
names = extract_people_depicted
tags = extract_keywords(names)
image.depicts = build_people_depicted(names)
image.abouts = build_keywords(tags)
- if @options[:include_image_data]
+ if @config.include_image_data
attachment = ::Chronicle::ETL::Models::Attachment.new
attachment.data = build_image_data
image.attachments = [attachment]
end
@@ -122,11 +116,11 @@
def build_keywords(topics)
topics.map do |topic|
t = ::Chronicle::ETL::Models::Entity.new
t.represents = 'topic'
- t.provider = @options[:involved][:provider]
+ t.provider = @config.involved[:provider]
t.title = topic
t.slug = topic.parameterize
t.dedupe_on = [[:provider, :represents, :slug]]
t
end
@@ -134,11 +128,11 @@
def build_people_depicted(names)
names.map do |name|
identity = ::Chronicle::ETL::Models::Entity.new
identity.represents = 'identity'
- identity.provider = @options[:involved][:provider]
+ identity.provider = @config.involved[:provider]
identity.slug = name.parameterize
identity.title = name
identity.dedupe_on = [[:provider, :represents, :slug]]
identity
end
@@ -197,10 +191,10 @@
# use it instead of UTC
timestamp = timestamp.change(offset: exif['OffsetTimeOriginal'])
elsif false
# TODO: support option of using GPS coordinates to determine timezone
else
- zone = ActiveSupport::TimeZone.new(@options[:timezone_default])
+ zone = ActiveSupport::TimeZone.new(@config.timezone_default)
timestamp = zone.parse(timestamp.asctime)
end
timestamp
end