require 'mini_exiftool'
require 'active_support'
require 'active_support/core_ext/object'
require 'active_support/core_ext/time'
require 'active_support/core_ext/hash/reverse_merge'
require 'active_support/core_ext/string/inflections'

module Chronicle
  module ETL
    # Transform a JPEG or other image file into a record.
    # By default, file mtime and a hash of the file content is used to build
    # the timestamp and ID respectively but other options are available (such
    # as reading EXIF tags or extended attributes from the filesystem).
    #
    # TODO: This should be extracted into its own plugin
    class ImageFileTransformer < Chronicle::ETL::Transformer
      register_connector do |r|
        r.identifier = 'image-file'
        r.description = 'an image file'
      end

      setting :timestamp_strategy, default: 'file_mtime'
      setting :id_strategy, default: 'file_hash'
      setting :verb, default: 'photographed'
      # EXIF tags often don't have timezones
      setting :timezone_default, default: 'Eastern Time (US & Canada)'
      setting :include_image_data, default: true
      setting :actor
      setting :involved

      def transform
        # FIXME: set @filename; use block for reading file when necessary
        @file = File.open(@extraction.data)
        record = build_created(@file)
        @file.close
        record
      end

      def friendly_identifier
        @file.path
      end

      def id
        @id ||= begin
          id = build_with_strategy(field: :id, strategy: @config.id_strategy)
          raise(UntransformableRecordError, "Could not build id") unless id

          id
        end
      end

      def timestamp
        @timestamp ||= begin
          ts = build_with_strategy(field: :timestamp, strategy: @config.timestamp_strategy)
          raise(UntransformableRecordError, "Could not build timestamp") unless ts

          ts
        end
      end

      private

      def build_created(file)
        record = ::Chronicle::ETL::Models::Activity.new
        record.verb = @config.verb
        record.provider = @config.provider
        record.provider_id = id
        record.end_at = timestamp
        record.dedupe_on = [[:provider_id, :verb, :provider]]

        record.involved = build_image
        record.actor = build_actor

        record.assign_attributes(build_gps)
        record
      end

      def build_actor
        actor = ::Chronicle::ETL::Models::Entity.new
        actor.represents = 'identity'
        actor.provider = @config.actor[:provider]
        actor.slug = @config.actor[:slug]
        actor.dedupe_on = [[:provider, :slug, :represents]]
        actor
      end

      def build_image
        image = ::Chronicle::ETL::Models::Entity.new
        image.represents = @config.involved[:represents]
        image.title = build_title
        image.body = exif['Description']
        image.provider = @config.involved[:provider]
        image.provider_id = id
        image.assign_attributes(build_gps)
        image.dedupe_on = [[:provider, :provider_id, :represents]]

        if @config.ocr_strategy
          ocr_text = build_with_strategy(field: :ocr, strategy: @config.ocr_strategy)
          image.metadata[:ocr_text] = ocr_text if ocr_text
        end

        names = extract_people_depicted
        tags = extract_keywords(names)

        image.depicts = build_people_depicted(names)
        image.abouts = build_keywords(tags)

        if @config.include_image_data
          attachment = ::Chronicle::ETL::Models::Attachment.new
          attachment.data = build_image_data
          image.attachments = [attachment]
        end

        image
      end

      def build_keywords(topics)
        topics.map do |topic|
          t = ::Chronicle::ETL::Models::Entity.new
          t.represents = 'topic'
          t.provider = @config.involved[:provider]
          t.title = topic
          t.slug = topic.parameterize
          t.dedupe_on = [[:provider, :represents, :slug]]
          t
        end
      end

      def build_people_depicted(names)
        names.map do |name|
          identity = ::Chronicle::ETL::Models::Entity.new
          identity.represents = 'identity'
          identity.provider = @config.involved[:provider]
          identity.slug = name.parameterize
          identity.title = name
          identity.dedupe_on = [[:provider, :represents, :slug]]
          identity
        end
      end

      def build_gps
        return {} unless exif['GPSLatitude']

        {
          lat: exif['GPSLatitude'],
          lng: exif['GPSLongitude'],
          elevation: exif['GPSAltitude']
        }
      end

      def build_image_data
        ::Chronicle::ETL::Utils::BinaryAttachments.filename_to_base64(filename: @file.path)
      end

      def build_title
        File.basename(@file)
      end

      def build_with_strategy(field:, strategy:[])
        strategies = [strategy].flatten.compact
        strategies.each do |s|
          builder_method = "build_#{field}_using_#{s}"
          result = send(builder_method.to_sym)
          return result if result
        end
        return
      end

      def build_id_using_file_hash
        Digest::SHA256.hexdigest(File.read(@file))
      end

      def build_id_using_xattr_version
        load_value_from_xattr_plist("com.apple.metadata:kMDItemVersion")
      end

      def build_id_using_xmp_document_id
        exif['OriginalDocumentID'] || exif['DerivedFromDocumentID']
      end

      def build_timestamp_using_file_mtime
        File.mtime(@file)
      end

      def build_timestamp_using_exif_datetimeoriginal
        # EXIF tags don't have timezone information. This is a DateTime in UTC
        timestamp = exif['DateTimeOriginal'] || return

        if exif['OffsetTimeOriginal']
          # Offset tags are only available in newer EXIF tags. If it exists, we
          # use it instead of UTC
          timestamp = timestamp.change(offset: exif['OffsetTimeOriginal'])
        elsif false
          # TODO: support option of using GPS coordinates to determine timezone
        else
          zone = ActiveSupport::TimeZone.new(@config.timezone_default)
          timestamp = zone.parse(timestamp.asctime)
        end

        timestamp
      end

      # TODO: add documentation for how to set up `macocr`
      def build_ocr_using_macocr
        `macocr "#{@file.path}" 2>/dev/null`.presence
      end

      def exif
        @exif ||= MiniExiftool.new(
          @file.path,
          numerical: true,

          # EXIF timestamps don't have timezone information. MiniExifTool uses Time
          # by default which parses timestamps in local time zone. Using DateTime
          # parses dates as UTC and then we can apply a timezone offset if the optional
          # EXIF timezone offset fields are available.
          # https://github.com/janfri/mini_exiftool/issues/39#issuecomment-832587649
          timestamps: DateTime
        )
      end

      # Figure out which faces are tagged as regions and return a list of their names
      def extract_people_depicted
        return [] unless exif['RegionName']

        names = [exif['RegionName']].flatten
        types = [exif['RegionType']].flatten

        names.zip(types).select{|x| x[1] == 'Face'}.map{|x| x[0]}.uniq
      end

      # Extract image keywords from EXIF/IPTC tag and subtract out those of which are
      # tagged people (determiend by looking at face regions)
      def extract_keywords(people_names = [])
        [exif['Keywords'] || []].flatten - people_names
      end

      def load_value_from_xattr_plist attribute
        require 'nokogiri'
        xml = `xattr -p #{attribute} \"#{@file.path}\" | xxd -r -p | plutil -convert xml1 -o - -- - 2>/dev/null`
        return unless xml
        value = Nokogiri::XML.parse(r).xpath("//string").text
        return value.presence
      end
    end
  end
end