# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2023 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/cli/command' module HexaPDF module CLI # Lists or extracts embedded files from a PDF file. # # See: HexaPDF::Type::EmbeddedFile class Files < Command def initialize #:nodoc: super('files', takes_commands: false) short_desc("List or extract embedded files from a PDF file") long_desc(<<~EOF) If the option --extract is not given, the available files are listed with their names and indices. The --extract option can then be used to extract one or more files. EOF options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array, "The indices of the files that should be extracted. Use 0 or no argument to " \ "extract all files.") do |indices| @indices = (indices ? indices.map(&:to_i) : [0]) end options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \ "standard locations (default: false)") do |search| @search = search end options.on("--password PASSWORD", "-p", String, "The password for decryption. Use - for reading from standard input.") do |pwd| @password = (pwd == '-' ? read_password : pwd) end @indices = [] @password = nil @search = false end def execute(pdf) #:nodoc: with_document(pdf, password: @password) do |doc| if @indices.empty? list_files(doc) else extract_files(doc) end end end private # Outputs the list of files embedded in the given PDF document. def list_files(doc) each_file(doc) do |obj, index| $stdout.write(sprintf("%4i: %s", index + 1, obj.path)) ef_stream = obj.embedded_file_stream if (params = ef_stream[:Params]) && !params.empty? data = [] data << "size: #{params[:Size]}" if params.key?(:Size) data << "md5: #{params[:CheckSum].unpack1('H*')}" if params.key?(:CheckSum) data << "ctime: #{params[:CreationDate]}" if params.key?(:CreationDate) data << "mtime: #{params[:ModDate]}" if params.key?(:ModDate) $stdout.write(" (#{data.join(', ')})") end $stdout.puts $stdout.puts(" #{obj[:Desc]}") if obj[:Desc] && !obj[:Desc].empty? end end # Extracts the files with the given indices. def extract_files(doc) each_file(doc) do |obj, index| next unless @indices.include?(index + 1) || @indices.include?(0) maybe_raise_on_existing_file(obj.path) puts "Extracting #{obj.path}..." if command_parser.verbosity_info? File.open(obj.path, 'wb') do |file| fiber = obj.embedded_file_stream.stream_decoder while fiber.alive? && (data = fiber.resume) file << data end end end end # Iterates over all embedded files. def each_file(doc, &block) # :yields: obj, index doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block) end end end end