# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2021 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'digest/md5'
require 'hexapdf/error'
require 'hexapdf/dictionary'
require 'hexapdf/stream'
module HexaPDF
module Encryption
# Base class for all encryption dictionaries.
#
# Contains entries common to all encryption dictionaries. If a specific security handler
# needs further fields it should derive a new subclass and add the new fields there.
#
# See: PDF1.7 s7.6.1
class EncryptionDictionary < Dictionary
define_field :Filter, type: Symbol, required: true
define_field :SubFilter, type: Symbol, version: '1.3'
define_field :V, type: Integer, required: true
define_field :Lenth, type: Integer, default: 40, version: '1.4'
define_field :CF, type: Dictionary, version: '1.5'
define_field :StmF, type: Symbol, default: :Identity, version: '1.5'
define_field :StrF, type: Symbol, default: :Identity, version: '1.5'
define_field :EFF, type: Symbol, version: '1.6'
# Returns +true+ because some PDF readers stumble when encountering a non-indirect encryption
# dictionary.
def must_be_indirect?
true
end
private
# Ensures that the encryption dictionary's content is valid.
def perform_validation
super
unless [1, 2, 4, 5].include?(value[:V])
yield("Value of /V is not one of 1, 2, 4 or 5", false)
return
end
if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
value[:Length] > 128 || value[:Length] % 8 != 0)
yield("Invalid value for /Length field when /V is 2", false)
end
end
end
# Base class for all security handlers.
#
# == Creating SecurityHandler Instances
#
# The base class provides two class methods for this:
#
# * The method ::set_up_encryption is used when a security handler instance should be created
# that populates the document's encryption dictionary.
#
# * The method ::set_up_decryption is used when a security handler should be created from the
# document's encryption dictionary.
#
# Security handlers could also be created with the ::new method but this is discouraged because
# the above methods provide the correct handling in both cases.
#
#
# == Using SecurityHandler Instances
#
# The SecurityHandler base class provides the methods for decrypting an indirect object and for
# encrypting strings and streams:
#
# * #decrypt
# * #encrypt_string
# * #encrypt_stream
#
# How the decryption/encryption key is actually computed is deferred to a sub class.
#
# Additionally, the #encryption_key_valid? method can be used to check whether the
# SecurityHandler instance is built from/built for the current version of the encryption
# dictionary.
#
#
# == Implementing a SecurityHandler Class
#
# Each security handler has to implement the following methods:
#
# prepare_encryption(**options)::
# Prepares the security handler for use in encrypting the document.
#
# See the #set_up_encryption documentation for information on which options are passed on to
# this method.
#
# Returns the encryption key as well as the names of the string, stream and embedded file
# algorithms.
#
# prepare_decryption(**options)::
# Prepares the security handler for decryption by using the information from the document's
# encryption dictionary as well as the provided arguments.
#
# See the #set_up_decryption documentation for additional information.
#
# Returns the encryption key that should be used for decryption.
#
# Additionally, the following methods can be overridden to provide a more specific meaning:
#
# encryption_dictionary_class::
# Returns the class that is used for the encryption dictionary. Should be derived from the
# EncryptionDictionary class.
class SecurityHandler
# Provides additional encryption specific information for HexaPDF::StreamData objects.
class EncryptedStreamData < StreamData
# The encryption key.
attr_reader :key
# The encryption algorithm.
attr_reader :algorithm
# Creates a new encrypted stream data object by utilizing the given stream data object as
# template. The arguments +key+ and +algorithm+ are used for decrypting purposes.
def initialize(obj, key, algorithm)
obj.instance_variables.each {|v| instance_variable_set(v, obj.instance_variable_get(v)) }
@key = key
@algorithm = algorithm
end
alias undecrypted_fiber fiber
# Returns a fiber like HexaPDF::StreamData#fiber, but one wrapped in a decrypting fiber.
def fiber(*args)
@algorithm.decryption_fiber(@key, super(*args))
end
end
# :call-seq:
# SecurityHandler.set_up_encryption(document, handler_name, **options) -> handler
#
# Sets up and returns the security handler with the specified name for the document and
# modifies then document's encryption dictionary accordingly.
#
# The +encryption_opts+ can contain any encryption options for the specific security handler
# and the common encryption options.
#
# See: #set_up_encryption (for the common encryption options).
def self.set_up_encryption(document, handler_name, **options)
handler = document.config.constantize('encryption.filter_map', handler_name) do
document.config.constantize('encryption.sub_filter_map', handler_name) do
raise HexaPDF::EncryptionError, "Could not find the specified security handler"
end
end
handler = handler.new(document)
document.trailer[:Encrypt] = handler.set_up_encryption(**options)
handler.freeze
end
# :call-seq:
# SecurityHandler.set_up_decryption(document, **options) -> handler
#
# Sets up and returns the security handler that is used for decrypting the given document and
# modifies the document's object loader so that the decryption is handled automatically behind
# the scenes.
#
# The +decryption_opts+ has to contain decryption options specific to the security handler
# that is used by the PDF file.
#
# See: #set_up_decryption
def self.set_up_decryption(document, **options)
dict = document.trailer[:Encrypt]
if dict.nil?
raise HexaPDF::EncryptionError, "No /Encrypt dictionary found"
end
handler = document.config.constantize('encryption.filter_map', dict[:Filter]) do
document.config.constantize('encryption.sub_filter_map', dict[:SubFilter]) do
raise HexaPDF::EncryptionError, "Could not find a suitable security handler"
end
end
handler = handler.new(document)
document.trailer[:Encrypt] = handler.set_up_decryption(dict, **options)
document.revisions.each do |r|
loader = r.loader
r.loader = lambda do |xref_entry|
obj = loader.call(xref_entry)
xref_entry.compressed? ? obj : handler.decrypt(obj)
end
end
handler.freeze
end
# A hash containing information about the used encryption. This information is only
# available once the security handler has been set up for decryption or encryption.
#
# Available keys:
#
# :version::
# The version of the security handler in use.
# :string_algorithm::
# The algorithm used for encrypting/decrypting strings.
# :stream_algorithm::
# The algorithm used for encrypting/decrypting streams.
# :embedded_file_algorithm::
# The algorithm used for encrypting/decrypting embedded files.
# :key_length::
# The key length in bits.
attr_reader :encryption_details
# Creates a new SecurityHandler for the given document.
def initialize(document)
@document = document
@encrypt_dict_hash = nil
@encryption_details = {}
end
# Checks if the encryption key computed by this security handler is derived from the
# document's encryption dictionary.
def encryption_key_valid?
document.unwrap(document.trailer[:Encrypt]).hash == @encrypt_dict_hash
end
# Decrypts the strings and the possibly attached stream of the given indirect object in
# place.
#
# See: PDF1.7 s7.6.2
def decrypt(obj)
return obj if obj == document.trailer[:Encrypt] || obj.type == :XRef
key = object_key(obj.oid, obj.gen, string_algorithm)
each_string_in_object(obj.value) do |str|
next if str.empty? || (obj.type == :Sig && obj[:Contents].equal?(str))
str.replace(string_algorithm.decrypt(key, str))
end
if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
unless string_algorithm == stream_algorithm
key = object_key(obj.oid, obj.gen, stream_algorithm)
end
obj.data.stream = EncryptedStreamData.new(obj.raw_stream, key, stream_algorithm)
end
obj
end
# Returns the encrypted version of the string that resides in the given indirect object.
#
# See: PDF1.7 s7.6.2
def encrypt_string(str, obj)
return str if str.empty? || obj == document.trailer[:Encrypt] || obj.type == :XRef ||
(obj.type == :Sig && obj[:Contents].equal?(str))
key = object_key(obj.oid, obj.gen, string_algorithm)
string_algorithm.encrypt(key, str)
end
# Returns a Fiber that encrypts the contents of the given stream object.
def encrypt_stream(obj)
return obj.stream_encoder if obj.type == :XRef
key = object_key(obj.oid, obj.gen, stream_algorithm)
source = obj.stream_source
result = obj.stream_encoder(source)
if result == source && obj.raw_stream.kind_of?(EncryptedStreamData) &&
obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
obj.raw_stream.undecrypted_fiber
else
filter = obj[:Filter]
if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
result
else
stream_algorithm.encryption_fiber(key, result)
end
end
end
# Computes the encryption key and sets up the algorithms for encrypting the document based on
# the given options, and returns the corresponding encryption dictionary.
#
# The security handler specific +options+ as well as the +algorithm+ argument are passed on to
# the #prepare_encryption method.
#
# Options for all security handlers:
#
# key_length::
# The key length in bits. Possible values are in the range of 40 to 128 and 256 and it
# needs to be divisible by 8.
#
# algorithm::
# The encryption algorithm. Possible values are :arc4 for ARC4 encryption with key lengths
# of 40 to 128 bit or :aes for AES encryption with key lengths of 128 or 256 bit.
#
# force_v4::
# Forces the use of protocol version 4 when key_length=128 and algorithm=:arc4.
#
# See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
def set_up_encryption(key_length: 128, algorithm: :aes, force_v4: false, **options)
@dict = document.wrap({}, type: encryption_dictionary_class)
dict[:V] =
case key_length
when 40
1
when 48, 56, 64, 72, 80, 88, 96, 104, 112, 120
2
when 128
(algorithm == :aes || force_v4 ? 4 : 2)
when 256
5
else
raise(HexaPDF::UnsupportedEncryptionError,
"Invalid key length #{key_length} specified")
end
dict[:Length] = key_length if dict[:V] == 2
if ![:aes, :arc4].include?(algorithm)
raise(HexaPDF::UnsupportedEncryptionError,
"Unsupported encryption algorithm: #{algorithm}")
elsif key_length < 128 && algorithm == :aes
raise(HexaPDF::UnsupportedEncryptionError,
"AES algorithm needs a key length of 128 or 256 bit")
elsif key_length == 256 && algorithm == :arc4
raise(HexaPDF::UnsupportedEncryptionError,
"ARC4 algorithm can only be used with key lengths between 40 and 128 bit")
end
result = prepare_encryption(algorithm: algorithm, **options)
@encrypt_dict_hash = document.unwrap(dict).hash
set_up_security_handler(*result)
@dict
end
# Uses the given encryption dictionary to set up the security handler for decrypting the
# document.
#
# The security handler specific +options+ are passed on to the #prepare_decryption method.
#
# See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
def set_up_decryption(dictionary, **options)
@dict = document.wrap(dictionary, type: encryption_dictionary_class)
case dict[:V]
when 1, 2
strf = stmf = eff = :arc4
when 4, 5
strf, stmf, eff = [:StrF, :StmF, :EFF].map do |alg|
if dict[:CF] && (cf_dict = dict[:CF][dict[alg]])
case cf_dict[:CFM]
when :V2 then :arc4
when :AESV2, :AESV3 then :aes
when :None then :identity
else
raise(HexaPDF::UnsupportedEncryptionError,
"Unsupported encryption method: #{cf_dict[:CFM]}")
end
else
:identity
end
end
eff = stmf unless dict[:EFF]
else
raise HexaPDF::UnsupportedEncryptionError, "Unsupported encryption version #{dict[:V]}"
end
set_up_security_handler(prepare_decryption(**options), strf, stmf, eff)
@encrypt_dict_hash = document.unwrap(@dict).hash
@dict
end
private
# Returns the associated PDF document.
#
# Subclasses should use this method to access the document.
def document
@document
end
# Returns the encryption dictionary used by this security handler.
#
# Subclasses should use this dictionary to read and set values.
def dict
@dict
end
# Returns the encryption key that is used for encryption/decryption.
#
# Only available after decryption or encryption has been set up.
def encryption_key
@encryption_key
end
# Returns the algorithm class that is used for encrypting/decrypting strings.
#
# Only available after decryption or encryption has been set up.
def string_algorithm
@string_algorithm
end
# Returns the algorithm class that is used for encrypting/decrypting streams.
#
# Only available after decryption or encryption has been set up.
def stream_algorithm
@stream_algorithm
end
# Returns the algorithm class that is used for encrypting/decrypting embedded files.
#
# Only available after decryption or encryption has been set up.
def embedded_file_algorithm
@embedded_file_algorithm
end
# Assigns all necessary attributes so that encryption/decryption works correctly.
#
# The assigned values can be retrieved via the #encryption_key, #string_algorithm,
# #stream_algorithm and #embedded_file_algorithm methods.
def set_up_security_handler(key, strf, stmf, eff)
@encryption_key = key
@string_algorithm = send("#{strf}_algorithm")
@stream_algorithm = send("#{stmf}_algorithm")
@embedded_file_algorithm = send("#{eff}_algorithm")
@encryption_details = {
version: dict[:V],
string_algorithm: strf,
stream_algorithm: stmf,
embedded_file_algorithm: eff,
key_length: key_length * 8,
}
end
# Returns the class that is used for ARC4 encryption.
def arc4_algorithm
@arc4_algorithm ||= document.config.constantize('encryption.arc4')
end
# Returns the class that is used for AES encryption.
def aes_algorithm
@aes_algorithm ||= document.config.constantize('encryption.aes')
end
# Returns the class that is used for the identity algorithm which passes back the data as is
# without encrypting or decrypting it.
def identity_algorithm
Identity
end
# Computes the key for decrypting the indirect object with the given algorithm.
#
# See: PDF1.7 s7.6.2 (algorithm 1), PDF2.0 s7.6.2.2 (algorithm 1.A)
def object_key(oid, gen, algorithm)
key = encryption_key
return key if dict[:V] == 5
key += [oid, gen].pack('VXv')
key << "sAlT" if algorithm.ancestors.include?(AES)
n_plus_5 = key_length + 5
Digest::MD5.digest(key)[0, (n_plus_5 > 16 ? 16 : n_plus_5)]
end
# Returns the length of the encryption key in bytes based on the security handlers version.
#
# See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
def key_length
case dict[:V]
when 1 then 5
when 2 then dict[:Length] / 8
when 4 then 16 # PDF2.0 s7.6.1 specifies that a /V of 4 is equal to length of 128bit
when 5 then 32 # PDF2.0 s7.6.1 specifies that a /V of 5 is equal to length of 256bit
end
end
# Returns the class used as wrapper for the encryption dictionary.
def encryption_dictionary_class
EncryptionDictionary
end
# Returns +n+ random bytes.
def random_bytes(n)
aes_algorithm.random_bytes(n)
end
# Finds all strings in the given object and yields them.
#
# Note: Decryption happens directly after parsing and loading an object, before it can be
# touched by anthing else. Therefore we only have to contend with the basic data structures.
def each_string_in_object(obj, &block) # :yields: str
case obj
when Hash
obj.each_value {|val| each_string_in_object(val, &block) }
when Array
obj.each {|inner_o| each_string_in_object(inner_o, &block) }
when String
yield(obj)
end
end
end
end
end