# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2023 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/cli/command'
module HexaPDF
module CLI
# Lists or extracts embedded files from a PDF file.
#
# See: HexaPDF::Type::EmbeddedFile
class Files < Command
def initialize #:nodoc:
super('files', takes_commands: false)
short_desc("List or extract embedded files from a PDF file")
long_desc(<<~EOF)
If the option --extract is not given, the available files are listed with their names and
indices. The --extract option can then be used to extract one or more files.
EOF
options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
"The indices of the files that should be extracted. Use 0 or no argument to " \
"extract all files.") do |indices|
@indices = (indices ? indices.map(&:to_i) : [0])
end
options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
"standard locations (default: false)") do |search|
@search = search
end
options.on("--password PASSWORD", "-p", String,
"The password for decryption. Use - for reading from standard input.") do |pwd|
@password = (pwd == '-' ? read_password : pwd)
end
@indices = []
@password = nil
@search = false
end
def execute(pdf) #:nodoc:
with_document(pdf, password: @password) do |doc|
if @indices.empty?
list_files(doc)
else
extract_files(doc)
end
end
end
private
# Outputs the list of files embedded in the given PDF document.
def list_files(doc)
each_file(doc) do |obj, index|
$stdout.write(sprintf("%4i: %s", index + 1, obj.path))
ef_stream = obj.embedded_file_stream
if (params = ef_stream[:Params]) && !params.empty?
data = []
data << "size: #{params[:Size]}" if params.key?(:Size)
data << "md5: #{params[:CheckSum].unpack1('H*')}" if params.key?(:CheckSum)
data << "ctime: #{params[:CreationDate]}" if params.key?(:CreationDate)
data << "mtime: #{params[:ModDate]}" if params.key?(:ModDate)
$stdout.write(" (#{data.join(', ')})")
end
$stdout.puts
$stdout.puts(" #{obj[:Desc]}") if obj[:Desc] && !obj[:Desc].empty?
end
end
# Extracts the files with the given indices.
def extract_files(doc)
each_file(doc) do |obj, index|
next unless @indices.include?(index + 1) || @indices.include?(0)
maybe_raise_on_existing_file(obj.path)
puts "Extracting #{obj.path}..." if command_parser.verbosity_info?
File.open(obj.path, 'wb') do |file|
fiber = obj.embedded_file_stream.stream_decoder
while fiber.alive? && (data = fiber.resume)
file << data
end
end
end
end
# Iterates over all embedded files.
def each_file(doc, &block) # :yields: obj, index
doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
end
end
end
end