# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2024 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/cli/command'
module HexaPDF
module CLI
# Outputs various bits of information about PDF files:
#
# * The entries in the trailers /Info dictionary
# * Encryption information from the trailers /Encrypt dictionary
# * The number of pages
# * The used PDF version
#
# See: HexaPDF::Type::Info, HexaPDF::Encryption::SecurityHandler
class Info < Command
def initialize #:nodoc:
super('info', takes_commands: false)
short_desc("Show document information")
long_desc(<<~EOF)
This command extracts information from the Info dictionary of a PDF file as well
as some other useful information like the used PDF version and encryption information.
If the --check option is specified, the PDF file will also be checked for parse and
validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
file by correcting the errors.
EOF
options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
@check_file = check
end
options.on("--password PASSWORD", "-p", String,
"The password for decryption. Use - for reading from standard input.") do |pwd|
@password = (pwd == '-' ? read_password : pwd)
end
@password = nil
@auto_decrypt = true
@check_file = false
end
def execute(file) #:nodoc:
output_info(file)
end
private
INFO_KEYS = [:Title, :Author, :Subject, :Keywords, :Creator, :Producer, #:nodoc:
:CreationDate, :ModDate].freeze
COLUMN_WIDTH = 20 #:nodoc:
def output_info(file) # :nodoc:
options = pdf_options(@password)
options[:config]['document.auto_decrypt'] = @auto_decrypt
HexaPDF::Document.open(file, **options) do |doc|
if @check_file
indirect_object = nil
validation_block = lambda do |msg, correctable, object|
object = indirect_object unless object.indirect? || object.type == :XXTrailer
object_type = if object.type == :XXTrailer
'trailer'
elsif !object.type.to_s.start_with?("XX")
"object type #{object.type} (#{object.oid},#{object.gen})"
else
"object (#{object.oid},#{object.gen})"
end
object_type = "sub-object of #{object_type}" if object == indirect_object
puts "WARNING: Validation error for #{object_type}: #{msg} " \
"#{correctable ? '(correctable)' : ''}"
end
doc.trailer.validate(auto_correct: true, &validation_block)
doc.each(only_loaded: false) do |obj|
indirect_object = obj
obj.validate(auto_correct: true, &validation_block)
if obj.data.stream
begin
obj.stream
rescue StandardError
puts "ERROR: Stream of object (#{obj.oid},#{obj.gen}) invalid: #{$!.message}"
end
end
end
end
output_line("File name", file)
output_line("File size", File.stat(file).size.to_s << " bytes")
@auto_decrypt && INFO_KEYS.each do |name|
value = doc.trailer.info[name]
next if !value || (value.kind_of?(String) && value.empty?)
output_line(name.to_s, doc.trailer.info[name].to_s)
end
if doc.encrypted? && @auto_decrypt
details = doc.security_handler.encryption_details
data = "yes (version: #{details[:version]}, key length: #{details[:key_length]}bits)"
output_line("Encrypted", data)
output_line(" Used Password", doc.security_handler.decryption_password_type)
output_line(" String algorithm", details[:string_algorithm].to_s)
output_line(" Stream algorithm", details[:stream_algorithm].to_s)
output_line(" EFF algorithm", details[:embedded_file_algorithm].to_s)
if doc.security_handler.respond_to?(:permissions)
output_line(" Permissions", doc.security_handler.permissions.join(", "))
end
elsif doc.encrypted?
output_line("Encrypted", "yes (no or wrong password given)")
end
if doc.revisions.parser.linearized?
output_line("Linearized", "yes")
end
signatures = doc.signatures.to_a
unless signatures.empty?
nr_sigs = signatures.count
output_line("Document signed", "yes - #{nr_sigs} signature#{nr_sigs > 1 ? 's' : ''}")
signatures.each do |signature|
output_line(" Signer", signature.signer_name)
output_line(" Signing time", signature.signing_time)
if (reason = signature.signing_reason)
output_line(" Reason", reason)
end
if (location = signature.signing_location)
output_line(" Location", location)
end
output_line(" Signature type", signature.signature_type)
signature.verify(allow_self_signed: true).messages.sort.each do |msg|
output_line(" #{msg.type.capitalize}", msg.content)
end
end
end
output_line("Pages", doc.pages.count.to_s)
output_line("Version", doc.version)
if doc.revisions.parser.reconstructed?
output_line("Reconstructed", "yes (use --check for details)")
end
end
rescue HexaPDF::EncryptionError
if @auto_decrypt
@auto_decrypt = false
retry
else
raise
end
rescue HexaPDF::MalformedPDFError => e
$stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
$stderr.puts " #{e}"
end
# Use custom options if we are checking the PDF file for errors.
def pdf_options(password)
if @check_file
options = {decryption_opts: {password: password}, config: {}}
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = true
HexaPDF::GlobalConfiguration['filter.flate.on_error'] = proc { true }
options[:config]['parser.try_xref_reconstruction'] = true
options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
puts "WARNING: Parse error at position #{pos}: #{msg}"
false
end
options
else
super
end
end
def output_line(header, text) #:nodoc:
puts("#{header}:".ljust(COLUMN_WIDTH) << text.to_s)
end
end
end
end