require 'open3'
require 'securerandom'
require 'tmpdir'
require 'iiif_print/split_pdfs/pdf_image_extraction_service'

module IiifPrint
  module SplitPdfs
    # @abstract
    #
    # The purpose of this class is to split the PDF into constituent image files.
    #
    # @see .call
    class BaseSplitter
      ##
      # @api public
      #
      # @param path [String] local path to the PDF that we will split.
      # @return [Enumerable]
      #
      # @see #each
      #
      # @note We're including the ** args to provide method conformity; other services require
      #       additional information (such as the FileSet)
      #
      # @see IiifPrint::SplitPdfs::DerivativeRodeoSplitter
      def self.call(path, **)
        new(path).to_a
      end

      ##
      # @api public
      #
      # Added to allow for fine-tuning of splitting decision such as tenant-based omission
      # @see https://github.com/samvera/hyku/blob/main/app/services/iiif_print/tenant_config.rb
      #
      # @return [Boolean] returns false to not limit the splitting of PDFs
      def self.never_split_pdfs?
        false
      end

      class_attribute :image_extension
      class_attribute :compression, default: nil
      class_attribute :quality, default: nil

      def initialize(path, tmpdir: Dir.mktmpdir, default_dpi: 400)
        @baseid = SecureRandom.uuid
        @pdfpath = path
        @pdfinfo = IiifPrint::SplitPdfs::PdfImageExtractionService.new(pdfpath)
        @tmpdir = tmpdir
        @default_dpi = default_dpi
      end

      # In creating {#each} we get many of the methods of array operation (e.g. #to_a).
      include Enumerable

      # @api public
      #
      # @yieldparam [String] the path to the page's tiff.
      def each
        entries.each do |e|
          yield(e)
        end
      end

      # @api private
      #
      # TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
      def invalid_pdf?
        return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.page_count.zero?
        false
      end

      attr_reader :pdfinfo, :tmpdir, :baseid, :default_dpi, :pdfpath
      private :pdfinfo, :tmpdir, :baseid, :default_dpi, :pdfpath

      private

      # entries for each page
      def entries
        return @entries if defined? @entries

        @entries = Array.wrap(gsconvert)
      end

      # rubocop:disable Metrics/MethodLength
      # ghostscript convert all pages to TIFF
      def gsconvert
        output_base = File.join(tmpdir, "#{baseid}-page%d.#{image_extension}")
        # NOTE: you must call gsdevice before compression, as compression is
        # updated during the gsdevice call.
        cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} -dTextAlphaBits=4"
        cmd += " -sCompression=#{compression}" if compression?
        cmd += " -dJPEGQ=#{quality}" if quality?
        cmd += " -sOutputFile=#{output_base} -r#{ppi} -f #{pdfpath}"
        filenames = []

        Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
          page_number = 0
          stdout.read.split("\n").each do |line|
            next unless line.start_with?('Page ')

            page_number += 1
            filenames << File.join(tmpdir, "#{baseid}-page#{page_number}.#{image_extension}")
          end
        end

        filenames
      end
      # rubocop:enable Metrics/MethodLength

      def gsdevice
        raise NotImplementedError
      end

      PAGE_COUNT_REGEXP = %r{^Pages: +(\d+)$}.freeze

      def pagecount
        return @pagecount if defined? @pagecount

        cmd = "pdfinfo #{pdfpath}"
        Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
          match = PAGE_COUNT_REGEXP.match(stdout.read)
          @pagecount = match[1].to_i
        end
        @pagecount
      end

      def ppi
        if looks_scanned?
          # For scanned media, defer to detected image PPI:
          pdfinfo.ppi
        else
          # 400 dpi for something that does not look like scanned media:
          default_dpi
        end
      end

      def looks_scanned?
        max_image_px = pdfinfo.width * pdfinfo.height
        # single 10mp+ image per page?
        single_image_per_page? && max_image_px > 1024 * 1024 * 10
      end

      def single_image_per_page?
        pdfinfo.page_count == pagecount
      end
    end
  end
end

require "iiif_print/split_pdfs/pages_to_jpgs_splitter"
require "iiif_print/split_pdfs/pages_to_pngs_splitter"
require "iiif_print/split_pdfs/pages_to_tiffs_splitter"