lib/hexapdf/cli/modify.rb in hexapdf-0.1.0 vs lib/hexapdf/cli/modify.rb in hexapdf-0.2.0
- old
+ new
@@ -37,32 +37,62 @@
module HexaPDF
module CLI
# Modifies a PDF file:
#
- # * Decrypts or encrypts the PDF file.
+ # * Adds pages from other PDF files.
+ # * Decrypts or encrypts the resulting output PDF file.
# * Generates or deletes object and cross-reference streams.
- # * Optimizes a PDF by merging the revisions of a PDF file and removes unused entries.
+ # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
#
# See: HexaPDF::Task::Optimize
class Modify < CmdParse::Command
+ InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
+
def initialize #:nodoc:
super('modify', takes_commands: false)
short_desc("Modify a PDF file")
long_desc(<<-EOF.gsub!(/^ */, ''))
- This command modifies a PDF file. It can be used to encrypt/decrypt a file, to optimize it
- and remove unused entries and to generate or delete object and cross-reference streams.
+ This command modifies a PDF file. It can be used to select pages that should appear in
+ the output file and to add pages from other PDF files. The output file can be
+ encrypted/decrypted and optimized in various ways.
+
+ The first input file is the primary file which gets modified, so meta data like file
+ information, outlines, etc. are taken from it. Alternatively, it is possible to start
+ with an empty PDF file by using --empty. The order of the options specifying the files
+ is important as they are used in that order.
+
+ Also note that the --password and --pages options apply to the last preceeding input file.
EOF
- options.on("--password PASSWORD", "-p", String,
- "The password for decryption. Use - for reading from standard input.") do |pwd|
- @password = (pwd == '-' ? command_parser.read_password("Input file password") : pwd)
+ options.separator("")
+ options.separator("Input file(s) related options")
+ options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
+ @files << InputSpec.new(file, '1-e')
end
- options.on("--pages PAGES", "The pages to be used in the output file") do |pages|
- @pages = pages
+ options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
+ "specified input file (use - for reading from standard input)") do |pwd|
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
+ pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
+ @files.last.password = pwd
end
+ options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
+ "should be used (default: 1-e)") do |pages|
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
+ @files.last.pages = pages
+ end
+ options.on("-e", "--empty", "Use an empty file as the first input file") do
+ @initial_empty = true
+ end
+ options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
+ "false)") do |c|
+ @interleave = c
+ end
+
+ options.separator("")
+ options.separator("Output file related options")
options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
"multiple times)") do |file|
@embed_files << file
end
options.on("--[no-]compact", "Delete unnecessary PDF objects (default: yes)") do |c|
@@ -81,26 +111,27 @@
options.on("--streams MODE", [:compress, :preserve, :uncompress],
"Handling of stream data (either compress, preserve or uncompress; default: " \
"preserve)") do |streams|
@streams = streams
end
-
- options.separator("")
- options.separator("Encryption related options")
+ options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
+ "time; default: no)") do |c|
+ @compress_pages = c
+ end
options.on("--decrypt", "Remove any encryption") do
@encryption = :remove
end
options.on("--encrypt", "Encrypt the output file") do
@encryption = :add
end
options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
- "output file. Use - for reading from standard input.") do |pwd|
+ "output file (use - for reading from standard input)") do |pwd|
@encryption = :add
@enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
end
options.on("--user-password PASSWORD", String, "The user password to be set on the " \
- "output file. Use - for reading from standard input.") do |pwd|
+ "output file (use - for reading from standard input)") do |pwd|
@encryption = :add
@enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
end
options.on("--algorithm ALGORITHM", [:aes, :arc4],
"The encryption algorithm: aes or arc4 (default: aes)") do |a|
@@ -119,23 +150,27 @@
end
syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys
options.on("--permissions PERMS", Array,
"Comma separated list of permissions to be set on the output file. Possible " \
"values: #{syms.join(', ')}") do |perms|
- perms.each do |perm|
- unless syms.include?(perm)
+ perms.map! do |perm|
+ unless syms.include?(perm.to_sym)
raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
end
+ perm.to_sym
end
@encryption = :add
@enc_permissions = perms
end
- @password = nil
- @pages = '1-e'
+ @files = []
+ @initial_empty = false
+ @interleave = false
+
@embed_files = []
@compact = true
+ @compress_pages = false
@object_streams = :preserve
@xref_streams = :preserve
@streams = :preserve
@encryption = :preserve
@@ -144,61 +179,126 @@
@enc_algorithm = :aes
@enc_force_v4 = false
@enc_permissions = []
end
- def execute(input_file, output_file) #:nodoc:
- @compact = true unless @pages == '1-e'
- if @enc_user_pwd && !@enc_user_pwd.empty? && (!@enc_owner_pwd || @enc_owner_pwd.empty?)
- @enc_owner_pwd = @enc_user_pwd
+ def execute(output_file) #:nodoc:
+ if !@initial_empty && @files.empty?
+ error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
+ error.reason = "Missing argument"
+ raise error
end
- HexaPDF::Document.open(input_file, decryption_opts: {password: @password}) do |doc|
- arrange_pages(doc) unless @pages == '1-e'
- @embed_files.each {|file| doc.utils.add_file(file, embed: true)}
+ # Create PDF documents for each input file
+ cache = {}
+ @files.each do |spec|
+ cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
+ decryption_opts: {password: spec.password})
+ spec.file = cache[spec.file]
+ end
- doc.task(:optimize, compact: @compact, object_streams: @object_streams,
- xref_streams: @xref_streams)
+ # Assemble pages
+ target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
+ page_tree = target.add(Type: :Pages)
+ import_pages(page_tree)
+ target.catalog[:Pages] = page_tree
- handle_streams(doc) if @streams != :preserve
-
- if @encryption == :add
- doc.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
- force_V4: @enc_force_v4, permissions: @enc_permissions,
- owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
- elsif @encryption == :remove
- doc.encrypt(name: nil)
+ # Remove potentially imported but unused pages and page tree nodes
+ retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
+ retained[target.pages.root.data] = true
+ target.each(current: false) do |obj|
+ next unless obj.kind_of?(HexaPDF::Dictionary)
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
+ target.delete(obj)
end
+ end
- doc.write(output_file)
+ # Embed the given files
+ @embed_files.each {|file| target.files.add(file, embed: true)}
+
+ # Optimize the PDF file
+ target.task(:optimize, compact: @compact, object_streams: @object_streams,
+ xref_streams: @xref_streams, compress_pages: @compress_pages)
+
+ # Update stream filters
+ handle_streams(target) unless @streams == :preserve
+
+ # Encrypt, decrypt or do nothing
+ if @encryption == :add
+ target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
+ force_V4: @enc_force_v4, permissions: @enc_permissions,
+ owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
+ elsif @encryption == :remove
+ target.encrypt(name: nil)
end
+
+ target.write(output_file)
rescue HexaPDF::Error => e
- $stderr.puts "Error while processing the PDF file: #{e.message}"
+ $stderr.puts "Processing error : #{e.message}"
exit(1)
end
+ def usage_arguments #:nodoc:
+ "{--file IN_FILE | --empty} OUT_FILE"
+ end
+
private
- # Arranges the pages of the document as specified with the --pages option.
- def arrange_pages(doc)
- pages = command_parser.parse_pages_specification(@pages, doc.pages.page_count)
- new_page_tree = doc.add(Type: :Pages)
- pages.each do |index, rotation|
- page = doc.pages.page(index)
- page.value.update(page.copy_inherited_values)
- if rotation == :none
- page.delete(:Rotate)
- else
- page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
+ # Imports the pages of the document as specified with the --pages option to the given page
+ # tree.
+ def import_pages(page_tree)
+ @files.each do |s|
+ page_list = s.file.pages.to_a
+ s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count)
+ s.pages.each do |arr|
+ arr[0] = page_list[arr[0]]
+ arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
end
- new_page_tree.add_page(page)
end
- doc.catalog[:Pages] = new_page_tree
+
+ if @interleave
+ max_pages_per_file = 0
+ all = @files.each_with_index.map do |spec, findex|
+ list = []
+ spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
+ max_pages_per_file = list.size if list.size > max_pages_per_file
+ list
+ end
+ first, *rest = *all
+ first[max_pages_per_file - 1] ||= nil
+ first.zip(*rest) do |slice|
+ slice.each do |source, findex, page, rotation|
+ next unless source
+ import_page(page_tree, findex, page, rotation)
+ end
+ end
+ else
+ @files.each_with_index do |s, findex|
+ s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
+ end
+ end
end
+ # Import the page with the given +rotation+ into the page tree.
+ def import_page(page_tree, source_index, page, rotation)
+ if page_tree.document == page.document
+ page.value.update(page.copy_inherited_values)
+ page = page.deep_copy unless source_index == 0
+ else
+ page = page_tree.document.import(page).deep_copy
+ end
+ if rotation == :none
+ page.delete(:Rotate)
+ elsif rotation.kind_of?(Integer)
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
+ end
+ page_tree.document.add(page)
+ page_tree.add_page(page)
+ end
+
IGNORED_FILTERS = { #:nodoc:
CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
- }
+ }.freeze
# Applies the chosen stream mode to all streams.
def handle_streams(doc)
doc.each(current: false) do |obj|
next if !obj.respond_to?(:set_filter) || obj[:Subtype] == :Image ||