# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2024 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'io/console' require 'ostruct' require 'cmdparse' require 'hexapdf/document' require 'hexapdf/font/true_type' module HexaPDF module CLI # Raised when problems occur on the CLI side of things. class Error < HexaPDF::Error; end # Base class for all hexapdf commands. It provides utility methods needed by the individual # commands. class Command < CmdParse::Command module Extensions #:nodoc: def help_banner #:nodoc: "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \ "Copyright (c) 2014-2023 Thomas Leitner; licensed under the AGPLv3\n\n" \ "#{format(usage, indent: 7)}\n\n" end def help #:nodoc: super << format("See https://hexapdf.gettalong.org/documentation/hexapdf.1.html " \ "for the full manual page with examples.", indent: 0) end end include Extensions def initialize(*args, **kwargs, &block) #:nodoc: super @out_options = OpenStruct.new @out_options.compact = true @out_options.compress_pages = false @out_options.object_streams = :preserve @out_options.xref_streams = :preserve @out_options.streams = :preserve @out_options.optimize_fonts = false @out_options.prune_page_resources = false @out_options.encryption = :preserve @out_options.enc_user_pwd = @out_options.enc_owner_pwd = nil @out_options.enc_key_length = 128 @out_options.enc_algorithm = :aes @out_options.enc_force_v4 = false @out_options.enc_permissions = [] end protected # Creates a HexaPDF::Document instance for the PDF file and yields it. # # If +out_file+ is given, the document is written to it after yielding. def with_document(file, password: nil, out_file: nil, incremental: false) #:yield: document if file == out_file doc = HexaPDF::Document.open(file, **pdf_options(password)) else file_io = File.open(file, 'rb') doc = HexaPDF::Document.new(io: file_io, **pdf_options(password)) end yield(doc) write_document(doc, out_file, incremental: incremental) ensure file_io&.close end # Returns a hash with HexaPDF::Document options based on the given password and the option # switches. def pdf_options(password) hash = {decryption_opts: {password: password}, config: {}} HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict HexaPDF::GlobalConfiguration['filter.flate.on_error'] = if command_parser.strict proc { true } else proc do |_, error| if command_parser.verbosity_info? $stderr.puts "Ignoring error in flate encoded stream: #{error}" end false end end hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict hash[:config]['parser.on_correctable_error'] = if command_parser.strict proc { true } else proc do |_, msg, pos| if command_parser.verbosity_info? msg = MalformedPDFError.new(msg, pos: pos).message $stderr.puts "Corrected parsing problem: #{msg}" end false end end hash end # Writes the document to the given file or does nothing if +out_file+ is +nil+. def write_document(doc, out_file, incremental: false) if out_file doc.trailer.update_id doc.validate(auto_correct: true) do |msg, correctable, object| if command_parser.strict && !correctable raise Error, "Validation error for object (#{object.oid},#{object.gen}): #{msg}" elsif command_parser.verbosity_info? $stderr.puts "#{correctable ? 'Corrected' : 'Ignored'} validation problem " \ "for object (#{object.oid},#{object.gen}): #{msg}" end end if command_parser.verbosity_info? puts "Creating output document #{out_file}" end doc.write(out_file, validate: false, incremental: incremental) end end # Checks whether the given output file exists and raises an error if it does and # HexaPDF::CLI#force is not set. def maybe_raise_on_existing_file(filename) if !command_parser.force && File.exist?(filename) raise Error, "Output file '#{filename}' already exists, not overwriting. Use --force to " \ "force writing" end end # Defines the optimization options. # # See: #out_options, #apply_optimization_options def define_optimization_options options.separator("") options.separator("Optimization options:") options.on("--[no-]compact", "Delete unnecessary PDF objects (default: " \ "#{@out_options.compact})") do |c| @out_options.compact = c end options.on("--object-streams MODE", [:generate, :preserve, :delete], "Handling of object streams (either generate, preserve or delete; " \ "default: #{@out_options.object_streams})") do |os| @out_options.object_streams = os end options.on("--xref-streams MODE", [:generate, :preserve, :delete], "Handling of cross-reference streams (either generate, preserve or delete; " \ "default: #{@out_options.xref_streams})") do |x| @out_options.xref_streams = x end options.on("--streams MODE", [:compress, :preserve, :uncompress], "Handling of stream data (either compress, preserve or uncompress; default: " \ "#{@out_options.streams})") do |streams| @out_options.streams = streams end options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \ "time; default: #{@out_options.compress_pages})") do |c| @out_options.compress_pages = c end options.on("--[no-]prune-page-resources", "Prunes unused objects from the page resources " \ "(may take a long time; default: #{@out_options.prune_page_resources})") do |c| @out_options.prune_page_resources = c end options.on("--[no-]optimize-fonts", "Optimize embedded font files; " \ "default: #{@out_options.optimize_fonts})") do |o| @out_options.optimize_fonts = o end end # Defines the encryption options. # # See: #out_options, #apply_encryption_options def define_encryption_options options.separator("") options.separator("Encryption options:") options.on("--decrypt", "Remove any encryption") do @out_options.encryption = :remove end options.on("--encrypt", "Encrypt the output file") do @out_options.encryption = :add end options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \ "output file (use - for reading from standard input)") do |pwd| @out_options.encryption = :add @out_options.enc_owner_pwd = (pwd == '-' ? read_password("Owner password") : pwd) end options.on("--user-password PASSWORD", String, "The user password to be set on the " \ "output file (use - for reading from standard input)") do |pwd| @out_options.encryption = :add @out_options.enc_user_pwd = (pwd == '-' ? read_password("User password") : pwd) end options.on("--algorithm ALGORITHM", [:aes, :arc4], "The encryption algorithm: aes or arc4 (default: " \ "#{@out_options.enc_algorithm})") do |a| @out_options.encryption = :add @out_options.enc_algorithm = a end options.on("--key-length BITS", Integer, "The encryption key length in bits (default: " \ "#{@out_options.enc_key_length})") do |i| @out_options.encryption = :add @out_options.enc_key_length = i end options.on("--force-V4", "Force use of encryption version 4 if key length=128 and algorithm=arc4") do @out_options.encryption = :add @out_options.enc_force_v4 = true end syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys options.on("--permissions PERMS", Array, "Comma separated list of permissions to be set on the output file. Possible " \ "values: #{syms.join(', ')}") do |perms| perms.map! do |perm| unless syms.include?(perm.to_sym) raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)" end perm.to_sym end @out_options.encryption = :add @out_options.enc_permissions = perms end end # Applies the optimization options to the given HexaPDF::Document instance. # # See: #define_optimization_options def apply_optimization_options(doc) doc.task(:optimize, compact: @out_options.compact, object_streams: @out_options.object_streams, xref_streams: @out_options.xref_streams, compress_pages: @out_options.compress_pages, prune_page_resources: @out_options.prune_page_resources) if @out_options.streams != :preserve || @out_options.optimize_fonts doc.each do |obj| optimize_stream(obj) optimize_font(obj) end end end IGNORED_FILTERS = { #:nodoc: CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true }.freeze # Applies the chosen stream mode to the given object. def optimize_stream(obj) return if @out_options.streams == :preserve || !obj.respond_to?(:set_filter) || Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f] } obj.set_filter(@out_options.streams == :compress ? :FlateDecode : nil) end # Optimize the object if it is a font object. def optimize_font(obj) return unless @out_options.optimize_fonts && obj.kind_of?(HexaPDF::Type::Font) && (obj[:Subtype] == :TrueType || (obj[:Subtype] == :Type0 && obj.descendant_font[:Subtype] == :CIDFontType2)) && obj.embedded? font = HexaPDF::Font::TrueType::Font.new(StringIO.new(obj.font_file.stream)) data = HexaPDF::Font::TrueType::Optimizer.build_for_pdf(font) obj.font_file.stream = data obj.font_file[:Length1] = data.size rescue StandardError => e if command_parser.verbosity_info? $stderr.puts "Error optimizing font object (#{obj.oid},#{obj.gen}): #{e.message}" end end # Applies the encryption related options to the given HexaPDF::Document instance. # # See: #define_encryption_options def apply_encryption_options(doc) case @out_options.encryption when :add doc.encrypt(algorithm: @out_options.enc_algorithm, key_length: @out_options.enc_key_length, force_v4: @out_options.enc_force_v4, permissions: @out_options.enc_permissions, owner_password: @out_options.enc_owner_pwd, user_password: @out_options.enc_user_pwd) when :remove doc.encrypt(name: nil) end end PAGE_NUMBER_SPEC = "(r?[1-9]\\d*|e)" #:nodoc: PAGE_MAP = lambda do |result, count| if result == 'e' count elsif result.start_with?('r') count - result[1..-1].to_i + 1 else result.to_i end end ROTATE_MAP = {'l' => 90, 'r' => -90, 'd' => 180, 'n' => :none}.freeze #:nodoc: # Parses the pages specification string and returns an array of tuples containing a page # number and a rotation value (either -90, 90, 180, :none or +nil+ where an integer means # adding a rotation by that number of degrees, :none means removing any set rotation value and # +nil+ means preserving the set rotation value). # # The parameter +count+ needs to be the total number of pages in the document. # # For details on the pages specification see the hexapdf(1) manual page. def parse_pages_specification(range, count) range.split(',').each_with_object([]) do |str, arr| case str when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o page_num = PAGE_MAP[$1, count] next if page_num > count arr << [page_num - 1, ROTATE_MAP[$2]] when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/o start_nr = [PAGE_MAP[$1, count], count].min - 1 end_nr = [PAGE_MAP[$2, count], count].min - 1 step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1) rotation = ROTATE_MAP[$4] start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation] } else raise OptionParser::InvalidArgument, "invalid page range format: #{str.inspect}" end end end # Reads a password from the standard input and falls back to the console if needed. # # The optional argument +prompt+ can be used to customize the prompt when reading from the # console. def read_password(prompt = "Password") if $stdin.tty? read_from_console(prompt) else ($stdin.gets || read_from_console(prompt)).chomp end end # Removes unused pages and page tree nodes from the document. def remove_unused_pages(doc) retained = doc.pages.each_with_object({}) {|page, h| h[page.data] = true } retained[doc.pages.root.data] = true doc.each do |obj| next unless obj.kind_of?(HexaPDF::Dictionary) if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data) doc.delete(obj) end end end # Returns the human readable file size. def human_readable_file_size(size) case size when 0..9999 then "#{size}B" when 10_000..999_999 then "#{(size / 1024.to_f).round(1)}K" else "#{(size.to_f / 1024 / 1024).round(1)}M" end end private # Displays the given prompt, reads from the console without echo and returns the read string. def read_from_console(prompt) IO.console.write("#{prompt}: ") str = IO.console.noecho {|io| io.gets.chomp } puts str end end end end