document.rb in hexapdf-0.33.0

- old
+ new

@@ -62,36 +62,47 @@
 
 # == HexaPDF API Documentation
 #
 # Here are some pointers to more in depth information:
 #
-# * For information about the command line application, see the HexaPDF::CLI module.
+# * HexaPDF::CLI has information about the accompanying command line application.
 # * HexaPDF::Document provides information about how to work with a PDF file.
+# * HexaPDF::Composer is the main class for easily creating PDF documents from scratch.
 # * HexaPDF::Content::Canvas provides the canvas API for drawing/writing on a page or form XObject
+# * HexaPDF::Type::AcroForm::Form is the entry point for working with interactive forms.
+# * HexaPDF::Type::Outline has information on working with outlines/bookmarks.
+# * HexaPDF::Encryption provides information on how encryption works.
+# * HexaPDF::DigitalSignature is the entry point for working with digital signaturs.
 module HexaPDF
 
   autoload(:Composer, 'hexapdf/composer')
 
   # == HexaPDF::Document
   #
-  # Represents one PDF document.
+  # Represents a PDF document.
   #
-  # A PDF document consists of (indirect) objects, so the main job of this class is to provide
-  # methods for working with these objects. However, since a PDF document may also be
+  # A PDF document essentially consists of (indirect) objects, so the main job of this class is to
+  # provide methods for working with these objects. However, since a PDF document may also be
   # incrementally updated and can therefore contain one or more revisions, there are also methods
-  # for working with these revisions.
+  # for working with these revisions (see Revisions for details).
   #
-  # Note: This class provides everything to work on PDF documents on a low-level basis. This means
-  # that there are no convenience methods for higher PDF functionality. Those can be found in the
-  # objects linked from here, like #catalog.
+  # Additionally, there are many convenience methods for easily accessing the most important PDF
+  # functionality, like encrypting a document (#encrypt), working with digital signatures
+  # (#signatures), accessing the interactive form data (#acro_form), working with the pages
+  # (#pages), fonts (#fonts) and images (#images).
   #
-  # == Known Messages
+  # Note: This class provides the basis for working with a PDF document. The higher PDF
+  # functionality is *not* implemented here but either in the appropriate PDF type classes or in
+  # special convenience classes. All this functionality can be accessed via the convenience methods
+  # described above.
   #
+  # == Available Message Hooks
+  #
   # The document object provides a basic message dispatch system via #register_listener and
   # #dispatch_message.
   #
-  # Following are the messages that are used by HexaPDF itself:
+  # Following messages are used by HexaPDF itself:
   #
   # :complete_objects::
   #      This message is called before the first step of writing a document. Listeners should
   #      complete PDF objects that are missing some information.
   #
@@ -136,21 +147,26 @@
       else
         new(**kwargs, io: StringIO.new(File.binread(filename)))
       end
     end
 
-    # The configuration for the document.
+    # The configuration object for the document.
+    #
+    # See Configuration for details.
     attr_reader :config
 
     # The revisions of the document.
+    #
+    # See Revisions.
     attr_reader :revisions
 
     # Creates a new PDF document, either an empty one or one read from the provided +io+.
     #
     # When an IO object is provided and it contains an encrypted PDF file, it is automatically
     # decrypted behind the scenes. The +decryption_opts+ argument has to be set appropriately in
-    # this case.
+    # this case. In case this is not wanted, the configuration option 'document.auto_decrypt' needs
+    # to be used.
     #
     # Options:
     #
     # io::
     #     If an IO object is provided, then this document can read PDF objects from this IO object,
@@ -181,12 +197,12 @@
 
     # :call-seq:
     #   doc.object(ref)    -> obj or nil
     #   doc.object(oid)    -> obj or nil
     #
-    # Returns the current version of the indirect object for the given exact reference or for the
-    # given object number.
+    # Returns the current version of the indirect object for the given exact reference (see
+    # Reference) or for the given object number.
     #
     # For references to unknown objects, +nil+ is returned but free objects are represented by a
     # PDF Null object, not by +nil+!
     #
     # See: Revisions#object
@@ -197,11 +213,11 @@
     # :call-seq:
     #   doc.object?(ref)    -> true or false
     #   doc.object?(oid)    -> true or false
     #
     # Returns +true+ if the the document contains an indirect object for the given exact reference
-    # or for the given object number.
+    # (see Reference) or for the given object number.
     #
     # Even though this method might return +true+ for some references, #object may return +nil+
     # because this method takes *all* revisions into account. Also see the discussion on #each for
     # more information.
     #
@@ -210,11 +226,11 @@
       @revisions.object?(ref)
     end
 
     # Dereferences the given object.
     #
-    # Return the object itself if it is not a reference, or the indirect object specified by the
+    # Returns the object itself if it is not a reference, or the indirect object specified by the
     # reference.
     def deref(obj)
       obj.kind_of?(Reference) ? object(obj) : obj
     end
 
@@ -225,11 +241,11 @@
     #
     # The object can either be a native Ruby object (Hash, Array, Integer, ...) or a
     # HexaPDF::Object. If it is not the latter, #wrap is called with the object and the
     # additional keyword arguments.
     #
-    # See: Revisions#add_object
+    # See: #wrap, Revisions#add_object
     def add(obj, **wrap_opts)
       obj = wrap(obj, **wrap_opts) unless obj.kind_of?(HexaPDF::Object)
 
       if obj.document? && obj.document != self
         raise HexaPDF::Error, "Can't add object that is already attached to another document"
@@ -264,18 +280,18 @@
     def import(obj)
       source = (obj.kind_of?(HexaPDF::Object) ? obj.document : nil)
       HexaPDF::Importer.for(self).import(obj, source: source)
     end
 
-    # Wraps the given object inside a HexaPDF::Object class which allows one to use
+    # Wraps the given object inside a HexaPDF::Object (sub)class which allows one to use
     # convenience functions to work with the object.
     #
     # The +obj+ argument can also be a HexaPDF::Object object so that it can be re-wrapped if
-    # needed.
+    # necessary.
     #
     # The class of the returned object is always a subclass of HexaPDF::Object (or of
-    # HexaPDF::Stream if a +stream+ is given). Which subclass is used, depends on the values of the
+    # HexaPDF::Stream if +stream+ is given). Which subclass is used, depends on the values of the
     # +type+ and +subtype+ options as well as on the 'object.type_map' and 'object.subtype_map'
     # global configuration options:
     #
     # * First +type+ is used to try to determine the class. If it is not provided and if +obj+ is a
     #   hash with a :Type field, the value of this field is used instead. If the resulting object is
@@ -289,17 +305,17 @@
     #   class need to have values; otherwise the subtype class is not used. This is done to better
     #   prevent invalid mappings when only partial knowledge (:Type key is missing) is available.
     #
     # * If there is no valid class after the above steps, HexaPDF::Stream is used if a stream is
     #   given, HexaPDF::Dictionary if the given object is a hash, HexaPDF::PDFArray if it is an
-    #   array or else HexaPDF::Object is used.
+    #   array or else HexaPDF::Object.
     #
     # Options:
     #
     # :type:: (Symbol or Class) The type of a PDF object that should be used for wrapping. This
     #         could be, for example, :Pages. If a class object is provided, it is used directly
-    #         instead of the type detection system.
+    #         instead of determining the class through the type detection system.
     #
     # :subtype:: (Symbol) The subtype of a PDF object which further qualifies a type. For
     #            example, image objects in PDF have a type of :XObject and a subtype of :Image.
     #
     # :oid:: (Integer) The object number that should be set on the wrapped object. Defaults to 0
@@ -339,11 +355,13 @@
         subtype ||= deref(data.value[:Subtype]) || deref(data.value[:S])
       end
       if subtype
         sub_klass = GlobalConfiguration.constantize('object.subtype_map', type, subtype) { klass }
         if type ||
-            sub_klass&.each_field&.none? {|name, field| field.required? && !data.value.key?(name) }
+            sub_klass&.each_field&.none? do |name, field|
+              field.required? && !data.value.key?(name) && name != :Type
+            end
           klass = sub_klass
         end
       end
 
       klass ||= if data.stream
@@ -408,31 +426,38 @@
     # :call-seq:
     #    doc.register_listener(name, callable)             -> callable
     #    doc.register_listener(name) {|*args| block}       -> block
     #
     # Registers the given listener for the message +name+.
+    #
+    # If +callable+ is provided, it needs to be an Object responding to #call. Otherwise the block
+    # has to be provided. The arguments that are provided to the #call method depend on the message.
+    #
+    # See: dispatch_message
     def register_listener(name, callable = nil, &block)
       callable ||= block
       (@listeners[name] ||= []) << callable
       callable
     end
 
     # Dispatches the message +name+ with the given arguments to all registered listeners.
     #
     # See the main Document documentation for an overview of messages that are used by HexaPDF
     # itself.
+    #
+    # See: register_listener
     def dispatch_message(name, *args)
       @listeners[name]&.each {|obj| obj.call(*args) }
     end
 
     UNSET = ::Object.new # :nordoc:
 
     # Caches and returns the given +value+ or the value of the given block using the given
-    # +pdf_data+ and +key+ arguments as composite cache key. If a cached value already exists and
-    # +update+ is +false+, the cached value is just returned.
+    # +pdf_data+ and +key+ arguments as composite cache key.
     #
-    # Set +update+ to +true+ to force an update of the cached value.
+    # If a cached value already exists and +update+ is +false+, the cached value is just returned.
+    # If +update+ is set to +true+, an update of the cached value is forced.
     #
     # This facility can be used to cache expensive operations in PDF objects that are easy to
     # compute again.
     #
     # Use #clear_cache to clear the cache if necessary.
@@ -442,44 +467,47 @@
     end
 
     # Returns +true+ if there is a value cached for the composite key consisting of the given
     # +pdf_data+ and +key+ objects.
     #
-    # Also see: #cache
+    # See: #cache
     def cached?(pdf_data, key)
       @cache.key?(pdf_data) && @cache[pdf_data].key?(key)
     end
 
     # Clears all cached data or, if a Object::PDFData object is given, just the cache for this one
     # object.
     #
     # It is *not* recommended to clear the whole cache! Better clear the cache for individual PDF
     # objects!
     #
-    # Also see: #cache
+    # See: #cache, #cached?
     def clear_cache(pdf_data = nil)
       pdf_data ? @cache[pdf_data].clear : @cache.clear
     end
 
-    # Returns the Pages object that provides convenience methods for working with pages.
+    # Returns the Pages object that provides convenience methods for working with the pages of the
+    # PDF file.
     #
-    # Also see: HexaPDF::Type::PageTreeNode
+    # See: Pages, Type::PageTreeNode
     def pages
       @pages ||= Pages.new(self)
     end
 
-    # Returns the Images object that provides convenience methods for working with images.
+    # Returns the Images object that provides convenience methods for working with images (e.g.
+    # adding them to the PDF or listing them).
     def images
       @images ||= Images.new(self)
     end
 
-    # Returns the Files object that provides convenience methods for working with files.
+    # Returns the Files object that provides convenience methods for working with embedded files.
     def files
       @files ||= Files.new(self)
     end
 
-    # Returns the Fonts object that provides convenience methods for working with fonts.
+    # Returns the Fonts object that provides convenience methods for working with the fonts used in
+    # the PDF file.
     def fonts
       @fonts ||= Fonts.new(self)
     end
 
     # Returns the Destinations object that provides convenience methods for working with destination
@@ -494,59 +522,67 @@
       @layout ||= Layout.new(self)
     end
 
     # Returns the main AcroForm object for dealing with interactive forms.
     #
-    # See HexaPDF::Type::Catalog#acro_form for details on the arguments.
+    # The meaning of the +create+ argument is detailed at Type::Catalog#acro_form.
+    #
+    # See: Type::AcroForm::Form
     def acro_form(create: false)
       catalog.acro_form(create: create)
     end
 
-    # Returns the main document outline object.
+    # Returns the entry object to the document outline (a.k.a. bookmarks).
     #
-    # See HexaPDF::Type::Outline for details.
+    # See: Type::Outline
     def outline
       catalog.outline
     end
 
     # Executes the given task and returns its result.
     #
     # Tasks provide an extensible way for performing operations on a PDF document without
     # cluttering the Document interface.
     #
-    # See Task for more information.
+    # See: Task
     def task(name, **opts, &block)
       task = config.constantize('task.map', name) do
         raise HexaPDF::Error, "No task named '#{name}' is available"
       end
       task.call(self, **opts, &block)
     end
 
     # Returns the trailer dictionary for the document.
+    #
+    # See: Type::Trailer
     def trailer
       @revisions.current.trailer
     end
 
     # Returns the document's catalog, the root of the object tree.
+    #
+    # See: Type::Catalog
     def catalog
       trailer.catalog
     end
 
     # Returns the PDF document's version as string (e.g. '1.4').
     #
     # This method takes the file header version and the catalog's /Version key into account. If a
     # version has been set manually and the catalog's /Version key refers to a later version, the
     # later version is used.
     #
-    # See: PDF1.7 s7.2.2
+    # See: PDF2.0 s7.2.2
     def version
       catalog_version = (catalog[:Version] || '1.0').to_s
       (@version < catalog_version ? catalog_version : @version)
     end
 
-    # Sets the version of the PDF document. The argument must be a string in the format 'M.N'
-    # where M is the major version and N the minor version (e.g. '1.4' or '2.0').
+    # Sets the version of the PDF document.
+    #
+    # The argument +value+ must be a string in the format 'M.N' where M is the major version and N
+    # the minor version (e.g. '1.4' or '2.0').
     def version=(value)
       raise ArgumentError, "PDF version must follow format M.N" unless value.to_s.match?(/\A\d\.\d\z/)
       @version = value.to_s
     end
 
@@ -555,23 +591,22 @@
       !trailer[:Encrypt].nil?
     end
 
     # Encrypts the document.
     #
-    # This is done by setting up a security handler for this purpose and populating the trailer's
-    # Encrypt dictionary accordingly. The actual encryption, however, is only done when writing the
-    # document.
+    # Encryption is done by setting up a security handler for this purpose and populating the
+    # trailer's Encrypt dictionary accordingly. The actual encryption, however, is only done when
+    # writing the document.
     #
     # The security handler used for encrypting is selected via the +name+ argument. All other
     # arguments are passed on the security handler.
     #
     # If the document should not be encrypted, the +name+ argument has to be set to +nil+. This
     # removes the security handler and deletes the trailer's Encrypt dictionary.
     #
-    # See: HexaPDF::Encryption::SecurityHandler#set_up_encryption and
-    # HexaPDF::Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption
-    # options.
+    # See: Encryption::SecurityHandler#set_up_encryption and
+    # Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption options.
     def encrypt(name: :Standard, **options)
       if name.nil?
         trailer.delete(:Encrypt)
         @security_handler = nil
       else
@@ -603,32 +638,31 @@
     end
 
     # Signs the document and writes it to the given file or IO object.
     #
     # For details on the arguments +file_or_io+, +signature+ and +write_options+ see
-    # HexaPDF::DigitalSignature::Signatures#add.
+    # DigitalSignature::Signatures#add.
     #
     # The signing handler to be used is determined by the +handler+ argument together with the rest
-    # of the keyword arguments (see HexaPDF::DigitalSignature::Signatures#signing_handler for
-    # details).
+    # of the keyword arguments (see DigitalSignature::Signatures#signing_handler for details).
     #
-    # If not changed, the default signing handler is
-    # HexaPDF::DigitalSignature::Signing::DefaultHandler.
+    # If not changed, the default signing handler is DigitalSignature::Signing::DefaultHandler.
     #
-    # *Note*: Once signing is done the document cannot be changed anymore since it was written. If a
-    # document needs to be signed multiple times, it needs to be loaded again after writing.
+    # *Note*: Once signing is done the document cannot be changed anymore since it was written
+    # during the signing process. If a document needs to be signed multiple times, it needs to be
+    # loaded again afterwards.
     def sign(file_or_io, handler: :default, signature: nil, write_options: {}, **handler_options)
       handler = signatures.signing_handler(name: handler, **handler_options)
       signatures.add(file_or_io, handler, signature: signature, write_options: write_options)
     end
 
     # Validates all current objects, or, if +only_loaded+ is +true+, only loaded objects, with
     # optional auto-correction, and returns +true+ if everything is fine.
     #
     # If a block is given, it is called on validation problems.
     #
-    # See HexaPDF::Object#validate for more information.
+    # See Object#validate for more information.
     def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
       result = trailer.validate(auto_correct: auto_correct, &block)
       each(only_loaded: only_loaded) do |obj|
         result &&= obj.validate(auto_correct: auto_correct, &block)
       end
@@ -649,10 +683,10 @@
     # incremental::
     #   Use the incremental writing mode which just adds a new revision to an existing document.
     #   This is needed, for example, when modifying a signed PDF and the original signature should
     #   stay valid.
     #
-    #   See: PDF1.7 s7.5.6
+    #   See: PDF2.0 s7.5.6
     #
     # validate::
     #   Validates the document and raises an error if an uncorrectable problem is found.
     #
     # update_fields::