# -*- encoding: utf-8 -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2016 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#++

require 'hexapdf/cli'

module HexaPDF
  module CLI

    # Extracts files from a PDF file.
    #
    # See: HexaPDF::Type::EmbeddedFile
    class Extract < CmdParse::Command

      def initialize #:nodoc:
        super('extract', takes_commands: false)
        short_desc("Extract files from a PDF file")
        long_desc(<<-EOF.gsub!(/^ */, ''))
          This command extracts files embedded in a PDF file. If the option --indices is not given,
          the available files are listed with their names and indices. The --indices option can then
          be used to extract one or more files.
        EOF
        options.on("--indices a,b,c", "-i a,b,c,...", Array,
                   "The indices of the files that should be extracted. Use 0 to extract " \
                   "all files.") do |indices|
          @indices = indices.map(&:to_i)
        end
        options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
                   "standard locations (default: false)") do |search|
          @search = search
        end
        options.on("--password PASSWORD", "-p", String,
                   "The password for decryption. Use - for reading from standard input.") do |pwd|
          @password = (pwd == '-' ? command_parser.read_password : pwd)
        end
        @indices = []
        @password = ''
        @search = false
      end

      def execute(file) #:nodoc:
        HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
          if @indices.empty?
            list_files(doc)
          else
            extract_files(doc)
          end
        end
      rescue HexaPDF::Error => e
        $stderr.puts "Error while processing the PDF file: #{e.message}"
        exit(1)
      end

      private

      # Outputs the list of files embedded in the given PDF document.
      def list_files(doc)
        each_file(doc) do |obj, index|
          $stdout.write(sprintf("%4i: %s", index + 1, obj.path))
          ef_stream = obj.embedded_file_stream
          if (params = ef_stream[:Params]) && !params.empty?
            data = []
            data << "size: #{params[:Size]}" if params.key?(:Size)
            data << "md5: #{params[:CheckSum].unpack('H*').first}" if params.key?(:CheckSum)
            data << "ctime: #{params[:CreationDate]}" if params.key?(:CreationDate)
            data << "mtime: #{params[:ModDate]}" if params.key?(:ModDate)
            $stdout.write(" (#{data.join(', ')})")
          end
          $stdout.puts
          $stdout.puts("      #{obj[:Desc]}") if obj[:Desc] && !obj[:Desc].empty?
        end
      end

      # Extracts the files with the given indices.
      def extract_files(doc)
        each_file(doc) do |obj, index|
          next unless @indices.include?(index + 1) || @indices.include?(0)
          if File.exist?(obj.path)
            raise HexaPDF::Error, "Output file #{obj.path} already exists, not overwriting"
          end
          puts "Extracting #{obj.path}..."
          File.open(obj.path, 'wb') do |file|
            fiber = obj.embedded_file_stream.stream_decoder
            while fiber.alive? && (data = fiber.resume)
              file << data
            end
          end
        end
      end

      # Iterates over all embedded files.
      def each_file(doc, &block) # :yields: obj, index
        doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
      end

    end

  end
end