# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2020 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/encryption/security_handler' require 'digest/md5' require 'digest/sha2' module HexaPDF module Encryption # The specialized encryption dictionary for the StandardSecurityHandler. # # Contains additional fields that are used for storing the information needed for retrieving # the encryption key and a set of permissions. class StandardEncryptionDictionary < EncryptionDictionary define_field :R, type: Integer, required: true define_field :O, type: PDFByteString, required: true define_field :OE, type: PDFByteString, version: '2.0' define_field :U, type: PDFByteString, required: true define_field :UE, type: PDFByteString, version: '2.0' define_field :P, type: Integer, required: true define_field :Perms, type: PDFByteString, version: '2.0' define_field :EncryptMetadata, type: Boolean, default: true, version: '1.5' private # Validates the fields special for this encryption dictionary. def perform_validation super case value[:R] when 2, 3, 4 if value[:U].length != 32 || value[:O].length != 32 yield("Invalid size for /U or /O values for revisions <= 4", false) end when 6 if !key?(:OE) || !key?(:UE) || !key?(:Perms) yield("Value of /OE, /UE or /Perms is missing for dictionary revision 6", false) return end if value[:U].length != 48 || value[:O].length != 48 || value[:UE].length != 32 || value[:OE].length != 32 || value[:Perms].length != 16 yield("Invalid size for /U, /O, /UE, /OE or /Perms values for revisions 6", false) end else yield("Value of /R is not one of 2, 3, 4 or 6", false) end end end # The password-based standard security handler of the PDF specification, identified by a # /Filter value of /Standard. # # == Overview # # The PDF specification defines one security handler that should be implemented by all PDF # conform libraries and applications. This standard security handler allows access permissions # and a user password as well as an owner password to be set. See # StandardSecurityHandler::EncryptionOptions for all valid options that can be used with this # security handler. # # The access permissions (see StandardSecurityHandler::Permissions) can be used to restrict what # a user is allowed to do with a PDF file. # # When a user or owner password is specified, a PDF file can only be opened when the correct # password is supplied. # # See: PDF1.7 s7.6.3, PDF2.0 s7.6.3 class StandardSecurityHandler < SecurityHandler # Defines all available permissions. # # It is possible to use an array of permission symbols instead of an integer to describe the # permission set. The used symbols are the lower case versions of the constants, i.e. the # symbol for MODIFY_CONSTANT would be :modify_constant. # # See: PDF1.7 s7.6.3.2 module Permissions # Printing (if HIGH_QUALITY_PRINT is also set, then high quality printing is allowed) PRINT = 1 << 2 # Modification of the content by operations that are different from those controller by # MODIFY_ANNOTATION, FILL_IN_FORMS and ASSEMBLE_DOCUMENT MODIFY_CONTENT = 1 << 3 # Copying of content COPY_CONTENT = 1 << 4 # Modifying annotations MODIFY_ANNOTATION = 1 << 5 # Filling in form fields FILL_IN_FORMS = 1 << 8 # Extracting content EXTRACT_CONTENT = 1 << 9 # Assembling of the document (inserting, rotating or deleting of pages and creation of # bookmarks or thumbnail images) ASSEMBLE_DOCUMENT = 1 << 10 # High quality printing HIGH_QUALITY_PRINT = 1 << 11 # Allows everything ALL = PRINT | MODIFY_CONTENT | COPY_CONTENT | MODIFY_ANNOTATION | FILL_IN_FORMS | EXTRACT_CONTENT | ASSEMBLE_DOCUMENT | HIGH_QUALITY_PRINT # Reserved permission bits RESERVED = 0xFFFFF000 | 0b11000000 # Maps permission symbols to their respective value SYMBOL_TO_PERMISSION = { print: PRINT, modify_content: MODIFY_CONTENT, copy_content: COPY_CONTENT, modify_annotation: MODIFY_ANNOTATION, fill_in_forms: FILL_IN_FORMS, extract_content: EXTRACT_CONTENT, assemble_document: ASSEMBLE_DOCUMENT, high_quality_print: HIGH_QUALITY_PRINT, }.freeze # Maps a permission value to its symbol PERMISSION_TO_SYMBOL = { PRINT => :print, MODIFY_CONTENT => :modify_content, COPY_CONTENT => :copy_content, MODIFY_ANNOTATION => :modify_annotation, FILL_IN_FORMS => :fill_in_forms, EXTRACT_CONTENT => :extract_content, ASSEMBLE_DOCUMENT => :assemble_document, HIGH_QUALITY_PRINT => :high_quality_print, }.freeze end # Defines all possible options that can be passed to a StandardSecurityHandler when setting # up encryption. class EncryptionOptions # The user password. If this attribute is not specified but the virtual +password+ # attribute is, then the latter is used. attr_accessor :user_password # The owner password. If this attribute is not specified but the virtual +password+ # attribute is, then the latter is used. attr_accessor :owner_password # The permissions. Either an integer with the needed permission bits set or an array of # permission symbols. # # See: Permissions attr_accessor :permissions # The encryption algorithm. attr_accessor :algorithm # Specifies whether metadata should be encrypted. attr_accessor :encrypt_metadata # :nodoc: def initialize(data = {}) fallback_pwd = data.delete(:password) { '' } @user_password = data.delete(:user_password) { fallback_pwd } @owner_password = data.delete(:owner_password) { fallback_pwd } @owner_password = @user_password if @owner_password.to_s.empty? @permissions = process_permissions(data.delete(:permissions) { Permissions::ALL }) @algorithm = data.delete(:algorithm) { :arc4 } @encrypt_metadata = data.delete(:encrypt_metadata) { true } unless data.empty? raise ArgumentError, "Invalid encryption options: #{data.keys.join(', ')}" end end private # Maps the permissions to an integer for use by the standard security handler. # # See: PDF1.7 s7.6.3.2, ADB1.7 3.5.2 (table 3.20 and the paragraphs before) def process_permissions(perms) if perms.kind_of?(Array) perms = perms.inject(0) do |result, perm| result | Permissions::SYMBOL_TO_PERMISSION.fetch(perm, 0) end end ((Permissions::RESERVED | perms) & 0xFFFFFFFC) - 2**32 end end # Additionally checks that the document trailer's ID has not changed. # # See: SecurityHandler#encryption_key_valid? def encryption_key_valid? super && (document.trailer[:Encrypt][:R] > 4 || trailer_id_hash == @trailer_id_hash) end # Returns the permissions of the managed dictionary as array of symbol values. # # See: Permissions def permissions Permissions::PERMISSION_TO_SYMBOL.each_with_object([]) do |(perm, sym), result| result << sym if dict[:P] & perm == perm end end private # Prepares the security handler for use in encrypting the document. # # See the attributes of the EncryptionOptions class for all possible arguments. def prepare_encryption(**kwoptions) options = EncryptionOptions.new(kwoptions) dict[:Filter] = :Standard dict[:R] = case dict[:V] when 1 then 2 when 2 then 3 when 4 then 4 when 5 then 6 end dict[:EncryptMetadata] = options.encrypt_metadata dict[:P] = options.permissions if dict[:V] >= 4 cfm = if options.algorithm == :arc4 :V2 elsif key_length == 16 :AESV2 else :AESV3 end dict[:CF] = { StdCF: { CFM: cfm, AuthEvent: :DocOpen, Length: key_length, }, } dict[:StmF] = dict[:StrF] = :StdCF end if dict[:R] <= 4 && !document.trailer[:ID].kind_of?(PDFArray) document.trailer.set_random_id end options.user_password = prepare_password(options.user_password) options.owner_password = prepare_password(options.owner_password) dict[:O] = compute_o_field(options.owner_password, options.user_password) dict[:U] = compute_u_field(options.user_password) if dict[:R] <= 4 encryption_key = compute_user_encryption_key(options.user_password) else encryption_key = random_bytes(32) dict[:UE] = compute_ue_field(options.user_password, encryption_key) dict[:OE] = compute_oe_field(options.owner_password, encryption_key) dict[:Perms] = compute_perms_field(encryption_key) end @trailer_id_hash = trailer_id_hash [encryption_key, options.algorithm, options.algorithm, options.algorithm] end # Uses the given password (or the default password if none given) to retrieve the encryption # key. # # If the optional +check_permissions+ argument is +true+, the permissions for files # encrypted with revision 6 are checked. Otherwise, permission changes are ignored. def prepare_decryption(password: '', check_permissions: true) if dict[:Filter] != :Standard raise(HexaPDF::UnsupportedEncryptionError, "Invalid /Filter value for standard security handler") elsif ![2, 3, 4, 6].include?(dict[:R]) raise(HexaPDF::UnsupportedEncryptionError, "Invalid /R value for standard security handler") elsif dict[:R] <= 4 && !document.trailer[:ID].kind_of?(PDFArray) raise(HexaPDF::EncryptionError, "Document ID for needed for decryption") end @trailer_id_hash = trailer_id_hash password = prepare_password(password) if user_password_valid?(prepare_password('')) encryption_key = compute_user_encryption_key(prepare_password('')) elsif user_password_valid?(password) encryption_key = compute_user_encryption_key(password) elsif owner_password_valid?(password) encryption_key = compute_owner_encryption_key(password) else raise HexaPDF::EncryptionError, "Invalid password specified" end check_perms_field(encryption_key) if check_permissions && dict[:R] == 6 encryption_key end # Computes the hash value for the first string in the trailer ID array. def trailer_id_hash # :nodoc: id = document.unwrap(document.trailer[:ID]) (id.kind_of?(Array) ? id[0] : id).hash end # See SecurityHandler#encryption_dictionary_class def encryption_dictionary_class StandardEncryptionDictionary end # The padding used for passwords with fewer than 32 bytes. Only used for revisions <= 4. # # See: PDF1.7 s7.6.3.3 PASSWORD_PADDING = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08" \ "\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A".b # Computes the user encryption key. # # For revisions <= 4 this is the *only* way for generating the encryption key needed to # encrypt or decrypt a file. # # For revision 6 the file encryption key is a string of random bytes that has been encrypted # with the user password. If the password is the owner password, # #compute_owner_encryption_key has to be used instead. # # See: PDF1.7 s7.6.3.3 (algorithm 2), PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(b),(e)) def compute_user_encryption_key(password) if dict[:R] <= 4 data = password data += dict[:O] data << [dict[:P]].pack('V') data << document.trailer[:ID][0] data << [0xFFFFFFFF].pack('V') if dict[:R] == 4 && !dict[:EncryptMetadata] n = key_length data = Digest::MD5.digest(data) if dict[:R] >= 3 50.times { data = Digest::MD5.digest(data[0, n]) } end data[0, n] elsif dict[:R] == 6 key = compute_hash(password, dict[:U][40, 8]) aes_algorithm.new(key, "\0" * 16, :decrypt).process(dict[:UE]) end end # Computes the owner encryption key. # # For revisions <= 4 this is done by first retrieving the user password through the use of # the owner password and then using the #compute_user_encryption_key method. # # For revision 6 file encryption key is a string of random bytes that has been encrypted # with the owner password. If the password is the user password, # #compute_user_encryption_key has to be used. # # See: PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(d)) def compute_owner_encryption_key(password) if dict[:R] <= 4 compute_user_encryption_key(user_password_from_owner_password(password)) elsif dict[:R] == 6 key = compute_hash(password, dict[:O][40, 8], dict[:U]) aes_algorithm.new(key, "\0" * 16, :decrypt).process(dict[:OE]) end end # Computes the encryption dictionary's /O (owner password) value. # # Short explanation: For revisions <= 4 the user password is encrypted with a key based on # the owner password. For revision 6 the /O value is a hash computed from the password and # the /U value with added validation and key salts. # # *Attention*: If revision 6 is used, the /U value has to be computed and set before this # method is used, otherwise the return value is incorrect! # # See: PDF1.7 s7.6.3.4 (algorithm 3), PDF2.0 s7.6.3.4.7 (algorithm 9 (a)) def compute_o_field(owner_password, user_password) if dict[:R] <= 4 data = Digest::MD5.digest(owner_password) if dict[:R] >= 3 50.times { data = Digest::MD5.digest(data) } end key = data[0, key_length] data = arc4_algorithm.encrypt(key, user_password) if dict[:R] >= 3 19.times {|i| data = arc4_algorithm.encrypt(xor_key(key, i + 1), data) } end data elsif dict[:R] == 6 validation_salt = random_bytes(8) key_salt = random_bytes(8) compute_hash(owner_password, validation_salt, dict[:U]) << validation_salt << key_salt end end # Computes the encryption dictionary's /OE (owner encryption key) value (for revision 6 # only). # # Short explanation: Encrypts the file encryption key with a key based on the password and # the /O and /U values. # # See: PDF2.0 s7.6.3.4.7 (algorithm 9 (b)) def compute_oe_field(password, file_encryption_key) key = compute_hash(password, dict[:O][40, 8], dict[:U]) aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key) end # Computes the encryption dictionary's /U (user password) value. # # Short explanation: For revisions <= 4, the password padding string is encrypted with a key # based on the user password. For revision 6 the /U value is a hash computed from the # password with added validation and key salts. # # See: PDF1.7 s7.6.3.4 (algorithm 4 for R=2, algorithm 5 for R=3 and R=4) # PDF2.0 s7.6.3.4.6 (algorithm 8 (a) for R=6) def compute_u_field(password) if dict[:R] == 2 key = compute_user_encryption_key(password) arc4_algorithm.encrypt(key, PASSWORD_PADDING) elsif dict[:R] <= 4 key = compute_user_encryption_key(password) data = Digest::MD5.digest(PASSWORD_PADDING + document.trailer[:ID][0]) data = arc4_algorithm.encrypt(key, data) 19.times {|i| data = arc4_algorithm.encrypt(xor_key(key, i + 1), data) } data << "hexapdfhexapdfhe" elsif dict[:R] == 6 validation_salt = random_bytes(8) key_salt = random_bytes(8) compute_hash(password, validation_salt) << validation_salt << key_salt end end # Computes the encryption dictionary's /UE (user encryption key) value (for revision 6 # only). # # Short explanation: Encrypts the file encryption key with a key based on the password and # the /U value. # # See: PDF2.0 s7.6.3.4.6 (algorithm 8 (b)) def compute_ue_field(password, file_encryption_key) key = compute_hash(password, dict[:U][40, 8]) aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key) end # Computes the encryption dictionary's /Perms (permissions) value (for revision 6 only). # # Uses /P and /EncryptMetadata values, so these have to be set beforehand. # # See: PDF2.0 s7.6.3.4.8 (algorithm 10) def compute_perms_field(file_encryption_key) data = [dict[:P]].pack('V') data << [0xFFFFFFFF].pack('V') data << (dict[:EncryptMetadata] ? 'T' : 'F') data << 'adb' data << 'hexa' aes_algorithm.new(file_encryption_key, "\0" * 16, :encrypt).process(data) end # Authenticates the user password, i.e. decides whether the given user password is valid. # # See: PDF1.7 s7.6.3.4 (algorithm 6), PDF2.0 s7.6.3.4.9 (algorithm 11) def user_password_valid?(password) if dict[:R] == 2 compute_u_field(password) == dict[:U] elsif dict[:R] <= 4 compute_u_field(password)[0, 16] == dict[:U][0, 16] elsif dict[:R] == 6 compute_hash(password, dict[:U][32, 8]) == dict[:U][0, 32] end end # Authenticates the owner password, i.e. decides whether the given owner password is valid. # # See: PDF1.7 s7.6.3.4 (algorithm 7), PDF2.0 s7.6.3.4.10 (algorithm 12) def owner_password_valid?(password) if dict[:R] <= 4 user_password_valid?(user_password_from_owner_password(password)) elsif dict[:R] == 6 compute_hash(password, dict[:O][32, 8], dict[:U]) == dict[:O][0, 32] end end # Checks if the decrypted /Perms entry matches the /P and /EncryptMetadata entries. # # This method can only be used for revision 6. # # See: PDF2.0 s7.6.3.4.11 (algorithm 13) def check_perms_field(encryption_key) decrypted = aes_algorithm.new(encryption_key, "\0" * 16, :decrypt).process(dict[:Perms]) if decrypted[9, 3] != "adb" raise HexaPDF::EncryptionError, "/Perms field cannot be decrypted" elsif (dict[:P] & 0xFFFFFFFF) != (decrypted[0, 4].unpack1('V') & 0xFFFFFFFF) raise HexaPDF::EncryptionError, "Decrypted permissions don't match /P" elsif decrypted[8] != (dict[:EncryptMetadata] ? 'T' : 'F') raise HexaPDF::EncryptionError, "Decrypted /Perms field doesn't match /EncryptMetadata" end end # Returns the user password when given the owner password for revisions <= 4. # # See: PDF1.7 s7.6.3.4 (algorithm 7 (a) and (b)) def user_password_from_owner_password(owner_password) data = Digest::MD5.digest(owner_password) if dict[:R] >= 3 50.times { data = Digest::MD5.digest(data) } end key = data[0, key_length] if dict[:R] == 2 userpwd = arc4_algorithm.decrypt(key, dict[:O]) else userpwd = dict[:O] 20.times {|i| userpwd = arc4_algorithm.decrypt(xor_key(key, 19 - i), userpwd) } end userpwd end # Computes a hash that is used extensively for all operations in security handlers of # revision 6. # # Note: The original input (as defined by the spec) is calculated as # "#{password}#{salt}#{user_key}" where +user_key+ has to be empty when doing operations # with the user password. # # See: PDF2.0 s7.6.3.3.3 (algorithm 2.B) def compute_hash(password, salt, user_key = '') k = Digest::SHA256.digest("#{password}#{salt}#{user_key}") e = '' i = 0 while i < 64 || e.getbyte(-1) > i - 32 k1 = "#{password}#{k}#{user_key}" * 64 e = aes_algorithm.new(k[0, 16], k[16, 16], :encrypt).process(k1) k = case e.unpack('C16').inject(&:+) % 3 # 256 % 3 == 1 % 3 --> x*256 % 3 == x % 3 when 0 then Digest::SHA256.digest(e) when 1 then Digest::SHA384.digest(e) when 2 then Digest::SHA512.digest(e) end i += 1 end k[0, 32] end # Returns the password modified so that if follows certain rules: # # * For revisions <= 4, the password is converted into ISO-8859-1 encoding, padded with # PASSWORD_PADDING and truncated to a maximum of 32 bytes. # # * For revision 6 the password is converted into UTF-8 encoding that is normalized # according to the PDF2.0 specification. # # See: PDF1.7 s7.6.3.3 (algorithm 2 step a)), # PDF2.0 s7.6.3.3.2 (algorithm 2.A steps a) and b)) def prepare_password(password) if dict[:R] <= 4 password.to_s[0, 32].encode(Encoding::ISO_8859_1).force_encoding(Encoding::BINARY). ljust(32, PASSWORD_PADDING) elsif dict[:R] == 6 password.to_s.encode(Encoding::UTF_8).force_encoding(Encoding::BINARY)[0, 127] end rescue Encoding::UndefinedConversionError => e raise HexaPDF::EncryptionError, "Invalid character in password: #{e.error_char}" end # XORs each byte of the String +key+ with value and returns the resulting string. def xor_key(key, value) new_key = key.dup i = 0 while i < new_key.length new_key.setbyte(i, (new_key.getbyte(i) ^ value) % 256) i += 1 end new_key end end end end