lib/hexapdf/cli/modify.rb in hexapdf-0.1.0 vs lib/hexapdf/cli/modify.rb in hexapdf-0.2.0

- old
+ new

@@ -37,32 +37,62 @@ module HexaPDF module CLI # Modifies a PDF file: # - # * Decrypts or encrypts the PDF file. + # * Adds pages from other PDF files. + # * Decrypts or encrypts the resulting output PDF file. # * Generates or deletes object and cross-reference streams. - # * Optimizes a PDF by merging the revisions of a PDF file and removes unused entries. + # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries. # # See: HexaPDF::Task::Optimize class Modify < CmdParse::Command + InputSpec = Struct.new(:file, :pages, :password) #:nodoc: + def initialize #:nodoc: super('modify', takes_commands: false) short_desc("Modify a PDF file") long_desc(<<-EOF.gsub!(/^ */, '')) - This command modifies a PDF file. It can be used to encrypt/decrypt a file, to optimize it - and remove unused entries and to generate or delete object and cross-reference streams. + This command modifies a PDF file. It can be used to select pages that should appear in + the output file and to add pages from other PDF files. The output file can be + encrypted/decrypted and optimized in various ways. + + The first input file is the primary file which gets modified, so meta data like file + information, outlines, etc. are taken from it. Alternatively, it is possible to start + with an empty PDF file by using --empty. The order of the options specifying the files + is important as they are used in that order. + + Also note that the --password and --pages options apply to the last preceeding input file. EOF - options.on("--password PASSWORD", "-p", String, - "The password for decryption. Use - for reading from standard input.") do |pwd| - @password = (pwd == '-' ? command_parser.read_password("Input file password") : pwd) + options.separator("") + options.separator("Input file(s) related options") + options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file| + @files << InputSpec.new(file, '1-e') end - options.on("--pages PAGES", "The pages to be used in the output file") do |pages| - @pages = pages + options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \ + "specified input file (use - for reading from standard input)") do |pwd| + raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty? + pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd) + @files.last.password = pwd end + options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \ + "should be used (default: 1-e)") do |pages| + raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty? + @files.last.pages = pages + end + options.on("-e", "--empty", "Use an empty file as the first input file") do + @initial_empty = true + end + options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \ + "false)") do |c| + @interleave = c + end + + options.separator("") + options.separator("Output file related options") options.on("--embed FILE", String, "Embed the file into the output file (can be used " \ "multiple times)") do |file| @embed_files << file end options.on("--[no-]compact", "Delete unnecessary PDF objects (default: yes)") do |c| @@ -81,26 +111,27 @@ options.on("--streams MODE", [:compress, :preserve, :uncompress], "Handling of stream data (either compress, preserve or uncompress; default: " \ "preserve)") do |streams| @streams = streams end - - options.separator("") - options.separator("Encryption related options") + options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \ + "time; default: no)") do |c| + @compress_pages = c + end options.on("--decrypt", "Remove any encryption") do @encryption = :remove end options.on("--encrypt", "Encrypt the output file") do @encryption = :add end options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \ - "output file. Use - for reading from standard input.") do |pwd| + "output file (use - for reading from standard input)") do |pwd| @encryption = :add @enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd) end options.on("--user-password PASSWORD", String, "The user password to be set on the " \ - "output file. Use - for reading from standard input.") do |pwd| + "output file (use - for reading from standard input)") do |pwd| @encryption = :add @enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd) end options.on("--algorithm ALGORITHM", [:aes, :arc4], "The encryption algorithm: aes or arc4 (default: aes)") do |a| @@ -119,23 +150,27 @@ end syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys options.on("--permissions PERMS", Array, "Comma separated list of permissions to be set on the output file. Possible " \ "values: #{syms.join(', ')}") do |perms| - perms.each do |perm| - unless syms.include?(perm) + perms.map! do |perm| + unless syms.include?(perm.to_sym) raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)" end + perm.to_sym end @encryption = :add @enc_permissions = perms end - @password = nil - @pages = '1-e' + @files = [] + @initial_empty = false + @interleave = false + @embed_files = [] @compact = true + @compress_pages = false @object_streams = :preserve @xref_streams = :preserve @streams = :preserve @encryption = :preserve @@ -144,61 +179,126 @@ @enc_algorithm = :aes @enc_force_v4 = false @enc_permissions = [] end - def execute(input_file, output_file) #:nodoc: - @compact = true unless @pages == '1-e' - if @enc_user_pwd && !@enc_user_pwd.empty? && (!@enc_owner_pwd || @enc_owner_pwd.empty?) - @enc_owner_pwd = @enc_user_pwd + def execute(output_file) #:nodoc: + if !@initial_empty && @files.empty? + error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed") + error.reason = "Missing argument" + raise error end - HexaPDF::Document.open(input_file, decryption_opts: {password: @password}) do |doc| - arrange_pages(doc) unless @pages == '1-e' - @embed_files.each {|file| doc.utils.add_file(file, embed: true)} + # Create PDF documents for each input file + cache = {} + @files.each do |spec| + cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file), + decryption_opts: {password: spec.password}) + spec.file = cache[spec.file] + end - doc.task(:optimize, compact: @compact, object_streams: @object_streams, - xref_streams: @xref_streams) + # Assemble pages + target = (@initial_empty ? HexaPDF::Document.new : @files.first.file) + page_tree = target.add(Type: :Pages) + import_pages(page_tree) + target.catalog[:Pages] = page_tree - handle_streams(doc) if @streams != :preserve - - if @encryption == :add - doc.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length, - force_V4: @enc_force_v4, permissions: @enc_permissions, - owner_password: @enc_owner_pwd, user_password: @enc_user_pwd) - elsif @encryption == :remove - doc.encrypt(name: nil) + # Remove potentially imported but unused pages and page tree nodes + retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true} + retained[target.pages.root.data] = true + target.each(current: false) do |obj| + next unless obj.kind_of?(HexaPDF::Dictionary) + if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data) + target.delete(obj) end + end - doc.write(output_file) + # Embed the given files + @embed_files.each {|file| target.files.add(file, embed: true)} + + # Optimize the PDF file + target.task(:optimize, compact: @compact, object_streams: @object_streams, + xref_streams: @xref_streams, compress_pages: @compress_pages) + + # Update stream filters + handle_streams(target) unless @streams == :preserve + + # Encrypt, decrypt or do nothing + if @encryption == :add + target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length, + force_V4: @enc_force_v4, permissions: @enc_permissions, + owner_password: @enc_owner_pwd, user_password: @enc_user_pwd) + elsif @encryption == :remove + target.encrypt(name: nil) end + + target.write(output_file) rescue HexaPDF::Error => e - $stderr.puts "Error while processing the PDF file: #{e.message}" + $stderr.puts "Processing error : #{e.message}" exit(1) end + def usage_arguments #:nodoc: + "{--file IN_FILE | --empty} OUT_FILE" + end + private - # Arranges the pages of the document as specified with the --pages option. - def arrange_pages(doc) - pages = command_parser.parse_pages_specification(@pages, doc.pages.page_count) - new_page_tree = doc.add(Type: :Pages) - pages.each do |index, rotation| - page = doc.pages.page(index) - page.value.update(page.copy_inherited_values) - if rotation == :none - page.delete(:Rotate) - else - page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360 + # Imports the pages of the document as specified with the --pages option to the given page + # tree. + def import_pages(page_tree) + @files.each do |s| + page_list = s.file.pages.to_a + s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count) + s.pages.each do |arr| + arr[0] = page_list[arr[0]] + arr[1] = arr[0].value[:Rotate] || :none unless arr[1] end - new_page_tree.add_page(page) end - doc.catalog[:Pages] = new_page_tree + + if @interleave + max_pages_per_file = 0 + all = @files.each_with_index.map do |spec, findex| + list = [] + spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]} + max_pages_per_file = list.size if list.size > max_pages_per_file + list + end + first, *rest = *all + first[max_pages_per_file - 1] ||= nil + first.zip(*rest) do |slice| + slice.each do |source, findex, page, rotation| + next unless source + import_page(page_tree, findex, page, rotation) + end + end + else + @files.each_with_index do |s, findex| + s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)} + end + end end + # Import the page with the given +rotation+ into the page tree. + def import_page(page_tree, source_index, page, rotation) + if page_tree.document == page.document + page.value.update(page.copy_inherited_values) + page = page.deep_copy unless source_index == 0 + else + page = page_tree.document.import(page).deep_copy + end + if rotation == :none + page.delete(:Rotate) + elsif rotation.kind_of?(Integer) + page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360 + end + page_tree.document.add(page) + page_tree.add_page(page) + end + IGNORED_FILTERS = { #:nodoc: CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true - } + }.freeze # Applies the chosen stream mode to all streams. def handle_streams(doc) doc.each(current: false) do |obj| next if !obj.respond_to?(:set_filter) || obj[:Subtype] == :Image ||