# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2020 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'uri' require 'hexapdf/dictionary' require 'hexapdf/stream' module HexaPDF module Type # Represents a file specification dictionary. # # File specifications are used to refer to other files or URLs from within a PDF file. Simple # file specifications are just strings. However, the are automatically converted on access to # a full file specification to provide a unified interface. # # == Working with File Specifications # # A file specification may refer to a file or an URL. This can easily be checked with #url?. # Independent of whether the file specification referes to an URL or a file, the #path method # returns the "best" useable path for it. # # Modifying a file specification should be done via the #path= and #url= methods as they # ensure that no obsolescent entries are used and the file specification is consistent. # # Finally, since embedded files in a PDF document are always linked to a file specification it # is useful to provide embedding/unembedding operations in this class, see #embed and # #unembed. # # See: PDF1.7 s7.11 class FileSpecification < Dictionary # The type used for the /EF field of a FileSpecification class EFDictionary < Dictionary define_type :XXFilespecEFDictionary define_field :F, type: :EmbeddedFile define_field :UF, type: :EmbeddedFile define_field :DOS, type: :EmbeddedFile define_field :Mac, type: :EmbeddedFile define_field :Unix, type: :EmbeddedFile end define_type :Filespec define_field :Type, type: Symbol, default: type, required: true define_field :FS, type: Symbol define_field :F, type: PDFByteString define_field :UF, type: String, version: '1.7' define_field :DOS, type: PDFByteString define_field :Mac, type: PDFByteString define_field :Unix, type: PDFByteString define_field :ID, type: PDFArray define_field :V, type: Boolean, version: '1.2' define_field :EF, type: :XXFilespecEFDictionary, version: '1.7' define_field :RF, type: Dictionary, version: '1.3' define_field :Desc, type: String, version: '1.6' define_field :CI, type: Dictionary, version: '1.7' # Returns +true+ if this file specification references an URL and not a file. def url? self[:FS] == :URL end # Returns the path for the referenced file or URL. An empty string is returned if no file # specification string is set. # # If multiple file specification strings are available, the fields are search in the # following order and the first one with a value is used: /UF, /F, /Unix, /Mac, /DOS. # # The encoding of the returned path string is either UTF-8 (for /UF) or BINARY (for /F # /Unix, /Mac and /DOS). def path tmp = (self[:UF] || self[:F] || self[:Unix] || self[:Mac] || self[:DOS] || '').dup tmp.gsub!(/\\\//, "/") # PDF1.7 s7.11.2.1 but / in filename is interpreted as separator! tmp.tr!("\\", "/") # always use slashes instead of back-slashes! tmp end # Sets the file specification string to the given filename. # # Since the /Unix, /Mac and /DOS fields are obsolescent, only the /F and /UF fields are set. def path=(filename) self[:UF] = self[:F] = filename delete(:FS) delete(:Unix) delete(:Mac) delete(:DOS) end # Sets the file specification string to the given URL and updates the file system entry # appropriately. # # The provided URL needs to be in an RFC1738 compliant string representation. If not, an # error is raised. def url=(url) begin URI(url) rescue URI::InvalidURIError => e raise HexaPDF::Error, e end self.path = url self[:FS] = :URL end # Returns +true+ if this file specification contains an embedded file. # # See: #embedded_file_stream def embedded_file? key?(:EF) && !self[:EF].empty? end # Returns the embedded file associated with this file specification, or +nil+ if this file # specification references no embedded file. # # If there are multiple possible embedded files, the /EF fields are searched in the following # order and the first one with a value is used: /UF, /F, /Unix, /Mac, /DOS. def embedded_file_stream return unless key?(:EF) ef = self[:EF] ef[:UF] || ef[:F] || ef[:Unix] || ef[:Mac] || ef[:DOS] end # :call-seq: # file_spec.embed(filename, name: File.basename(filename), register: true) -> ef_stream # file_spec.embed(io, name:, register: true) -> ef_stream # # Embeds the given file or IO stream into the PDF file, sets the path accordingly and returns # the created stream object. # # If a file is given, the +name+ option defaults to the basename of the file. However, if an # IO object is given, the +name+ argument is mandatory. # # If there already was a file embedded for this file specification, it is unembedded first. # # The embedded file stream automatically uses the FlateEncode filter for compressing the # embedded file. # # Options: # # name:: # The name that should be used as path value and when registering. # # register:: # Specifies whether the embedded file will be added to the EmbeddedFiles name tree under # the +name+. If the name is already taken, it's value is overwritten. # # The file has to be available until the PDF document gets written because reading and # writing is done lazily. def embed(file_or_io, name: nil, register: true) name ||= File.basename(file_or_io) if file_or_io.kind_of?(String) if name.nil? raise ArgumentError, "The name argument is mandatory when given an IO object" end unembed self.path = name self[:EF] ||= {} ef_stream = self[:EF][:UF] = self[:EF][:F] = document.add({Type: :EmbeddedFile}) stat = if file_or_io.kind_of?(String) File.stat(file_or_io) elsif file_or_io.respond_to?(:stat) file_or_io.stat end if stat ef_stream[:Params] = {Size: stat.size, CreationDate: stat.ctime, ModDate: stat.mtime} end ef_stream.set_filter(:FlateDecode) ef_stream.stream = HexaPDF::StreamData.new(file_or_io) if register (document.catalog[:Names] ||= {})[:EmbeddedFiles] ||= {} document.catalog[:Names][:EmbeddedFiles].add_entry(name, self) end ef_stream end # Deletes any embedded file streams associated with this file specification. A possible entry # in the EmbeddedFiles name tree is also deleted. def unembed return unless key?(:EF) self[:EF].each {|_, ef_stream| document.delete(ef_stream) } if document.catalog.key?(:Names) && document.catalog[:Names].key?(:EmbeddedFiles) tree = document.catalog[:Names][:EmbeddedFiles] tree.each_entry.find_all {|_, spec| spec == self }.each do |(name, _)| tree.delete_entry(name) end end end end end end