# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2023 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/error' require 'hexapdf/reference' module HexaPDF # Internal value object for storing object number, generation number, object value and a # possible stream together. Such objects are not used directly but wrapped by Object or one of # its subclasses. class PDFData #:nodoc: attr_reader :oid, :gen #:nodoc: attr_accessor :stream, :value def initialize(value, oid = nil, gen = nil, stream = nil) #:nodoc: self.value = value self.oid = oid self.gen = gen self.stream = stream end def oid=(oid) #:nodoc: @oid = Integer(oid || 0) end def gen=(gen) #:nodoc @gen = Integer(gen || 0) end end # Objects of the PDF object system. # # == Overview # # A PDF object is like a normal object but with an additional *object identifier* consisting of # an object number and a generation number. If the object number is zero, then the PDF object # represents a direct object. Otherwise the object identifier uniquely identifies this object as # an indirect object and can be used for referencing it (from possibly multiple places). # # Furthermore a PDF object may have an associated stream. However, this stream is only # accessible if the subclass Stream is used. # # A PDF object *should* be connected to a PDF document, otherwise some methods may not work. # # Most PDF objects in a PDF document are represented by subclasses of this class that provide # additional functionality. # # The methods #hash and #eql? are implemented so that objects of this class can be used as hash # keys. Furthermore the implementation is compatible to the one of Reference, i.e. the hash of a # PDF Object is the same as the hash of its corresponding Reference object. # # == Allowed PDF Object Values # # The PDF specification knows of the following object types: # # * Boolean (mapped to +true+ and +false+), # * Integer (mapped to Integer object) # * Real (mapped to Float objects) # * String (mapped to String objects with UTF-8 or binary encoding) # * Names (mapped to Symbol objects) # * Array (mapped to Array objects) # * Dictionary (mapped to Hash objects) # * Stream (mapped to the Stream class which is a Dictionary with the associated stream data) # * Null (mapped to +nil+) # * Indirect Object (mapped to this class) # # So working with PDF objects in HexaPDF is rather straightforward since the common Ruby objects # can be used for most things, i.e. wrapping an plain Ruby object into an object of this class is # not necessary (except if it should become an indirect object). # # There are also some additional data structures built from these primitive ones. For example, # Time objects are represented as specially formatted string objects and conversion from and to # the string representation is handled automatically. # # *Important*: Users of HexaPDF may use other plain Ruby objects but then there is no guarantee # that everything will work correctly, especially when using other collection types than arrays # and hashes. # # See: HexaPDF::Dictionary, HexaPDF::Stream, HexaPDF::Reference, HexaPDF::Document # # See: PDF1.7 s7.3.10, s7.3.8 class Object include Comparable # :call-seq: # HexaPDF::Object.deep_copy(object) -> copy # # Creates a deep copy of the given object which retains the references to indirect objects. def self.deep_copy(object) case object when Hash object.transform_values {|value| deep_copy(value) } when Array object.map {|o| deep_copy(o) } when HexaPDF::Object (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value)) when HexaPDF::Reference object else object.dup end end # Makes sure that the object itself as well as all nested values are direct objects. # # If an indirect object is found, it is turned into a direct object and the indirect object is # deleted from the document. def self.make_direct(object) if object.kind_of?(HexaPDF::Object) && object.indirect? object_to_delete = object object = object.value object_to_delete.document.delete(object_to_delete) end if object.kind_of?(Hash) object.transform_values! {|val| make_direct(val) } elsif object.kind_of?(Array) object.map! {|val| make_direct(val) } end object end # Returns +nil+ to end the recursion for field searching in Dictionary.field. def self.field(_name) nil end # The wrapped HexaPDF::PDFData value. # # This attribute is not part of the public API! attr_reader :data # Sets the associated PDF document. attr_writer :document # Sets whether the object has to be an indirect object once it is written. attr_writer :must_be_indirect # Creates a new PDF object wrapping the value. # # The +value+ can either be a PDFData object in which case it is used directly. If it is a PDF # Object, then its data is used. Otherwise the +value+ object is used as is. In all cases, the # oid, gen and stream values may be overridden by the corresponding keyword arguments. def initialize(value, document: nil, oid: nil, gen: nil, stream: nil) @data = case value when PDFData then value when Object then value.data else PDFData.new(value) end @data.oid = oid if oid @data.gen = gen if gen @data.stream = stream if stream self.document = document self.must_be_indirect = false after_data_change end # Returns the object number of the PDF object. def oid data.oid end # Sets the object number of the PDF object. def oid=(oid) data.oid = oid end # Returns the generation number of the PDF object. def gen data.gen end # Sets the generation number of the PDF object. def gen=(gen) data.gen = gen end # Returns the object value. def value data.value end # Sets the object value. Unlike in #initialize the value is used as is! def value=(val) data.value = val after_data_change end # Returns the associated PDF document. # # If no document is associated, an error is raised. def document @document || raise(HexaPDF::Error, "No document associated with this object (#{inspect})") end # Returns +true+ if a PDF document is associated. def document? !@document.nil? end # Returns +true+ if the object is an indirect object (i.e. has an object number unequal to # zero). def indirect? oid != 0 end # Returns +true+ if the object must be an indirect object once it is written. def must_be_indirect? @must_be_indirect end # Returns the type (symbol) of the object. # # Since the type system is implemented in such a way as to allow exchanging implementations of # specific types, the class of an object can't be reliably used for determining the actual # type. # # However, the Type and Subtype fields can easily be used for this. Subclasses for PDF objects # that don't have such fields may use a unique name that has to begin with XX (see PDF1.7 sE.2) # and therefore doesn't clash with names defined by the PDF specification. # # For basic objects this always returns +:Unknown+. def type :Unknown end # Returns +true+ if the object represents the PDF null object. def null? value.nil? end # :call-seq: # obj.validate(auto_correct: true) -> true or false # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false # # Validates the object, optionally corrects problems when the option +auto_correct+ is set and # returns +true+ if the object is deemed valid and +false+ otherwise. # # If a block is given, it is called on validation problems with a problem description and # whether the problem is automatically correctable. The third argument to the block is usually # this object but may be another object if during auto-correction a new object was created and # validated. # # The validation routine itself has to be implemented in the #perform_validation method - see # its documentation for more information. # # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't # currently implement the full PDF spec. However, if the return value is +false+, there is # certainly a problem! def validate(auto_correct: true) result = true perform_validation do |msg, correctable, object| yield(msg, correctable, object || self) if block_given? result = false unless correctable return false unless auto_correct end result end # Makes a deep copy of the source PDF object and resets the object identifier. def deep_copy obj = dup obj.instance_variable_set(:@data, @data.dup) obj.data.oid = 0 obj.data.gen = 0 obj.data.stream = @data.stream.dup if @data.stream.kind_of?(String) obj.data.value = self.class.deep_copy(@data.value) obj end # Caches and returns the given +value+ or the value of the block under the given cache key. If # there is already a cached value for the key and +update+ is +false+, it is just returned. # # Set +update+ to +true+ to force an update of the cached value. # # This uses Document#cache internally. def cache(key, value = Document::UNSET, update: false, &block) document.cache(@data, key, value, update: update, &block) end # Returns +true+ if there is a cached value for the given key. # # This uses Document#cached? internally. def cached?(key) document.cached?(@data, key) end # Clears the cache for this object. def clear_cache document.clear_cache(@data) end # Compares this object to another object. # # If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects # are ordered first by object number and then by generation number. def <=>(other) return nil unless other.respond_to?(:oid) && other.respond_to?(:gen) (oid == other.oid ? gen <=> other.gen : oid <=> other.oid) end # Returns +true+ in the following cases: # # * The other object is an Object and wraps the same #data structure. # * The other object is a Reference with the same oid/gen. # * This object is not indirect and the other object is not an Object and equal to the value of # this object. def ==(other) (other.kind_of?(Object) && data == other.data) || (other.kind_of?(Reference) && other == self) || (!indirect? && !other.kind_of?(Object) && other == data.value) end # Returns +true+ if the other object references the same PDF object as this object. def eql?(other) other.respond_to?(:oid) && oid == other.oid && other.respond_to?(:gen) && gen == other.gen end # Computes the hash value based on the object and generation numbers. def hash oid.hash ^ gen.hash end def inspect #:nodoc: "#<#{self.class.name} [#{oid}, #{gen}] value=#{value.inspect}>" end private # This method is called whenever the value or the stream of the wrapped PDFData structure is # changed. # # A subclass implementing this method has to call +super+! Otherwise things might not work # properly. def after_data_change end # Returns the configuration object of the PDF document. def config document.config end # Validates the basic object properties. # # == Implementation Hint for Subclasses # # A subclass needs to call the super method so that the validation routines of the superclasses # are also performed! # # When the validation routine finds that the object is invalid, it has to yield a problem # description and whether the problem can be corrected. An optional third argument may contain # the object that gets validated if it is different from this object (may happen when # auto-correction is used). # # After yielding, the problem has to be corrected if it is correctable. If it is not correctable # and not correcting would lead to exceptions the method has to return early. # # Here is a sample validation routine for a dictionary object type: # # def perform_validation # super # # if value[:SomeKey].length != 7 # yield("Length of /SomeKey is invalid") # # No need to return early here because following check doesn't rely on /SomeKey # end # # if value[:OtherKey] % 2 == 0 # yield("/OtherKey needs to contain an odd number of elements") # end # end def perform_validation(&block) # Validate that the object is indirect if #must_be_indirect? is +true+. if must_be_indirect? && !indirect? yield("Object must be an indirect object", true) document.add(self) end validate_nested(value, &block) end # Validates all nested values of the object, i.e. values inside collection objects. def validate_nested(obj, &block) if obj.kind_of?(HexaPDF::Object) && !obj.indirect? obj.validate(&block) elsif obj.kind_of?(Hash) obj.each_value {|val| validate_nested(val, &block) } elsif obj.kind_of?(Array) obj.each {|val| validate_nested(val, &block) } end end end end