# encoding: utf-8 # prawn/core/object_store.rb : Implements PDF object repository for Prawn # # Copyright August 2009, Brad Ediger. All Rights Reserved. # # This is free software. Please see the LICENSE and COPYING files for details. require 'pdf/reader' module Prawn module Core class ObjectStore #:nodoc: include Enumerable attr_reader :min_version BASE_OBJECTS = %w[info pages root] def initialize(opts = {}) @objects = {} @identifiers = [] load_file(opts[:template]) if opts[:template] @info ||= ref(opts[:info] || {}).identifier @root ||= ref(:Type => :Catalog).identifier if pages.nil? root.data[:Pages] = ref(:Type => :Pages, :Count => 0, :Kids => []) end end def ref(data, &block) push(size + 1, data, &block) end def info @objects[@info] end def root @objects[@root] end def pages root.data[:Pages] end def page_count pages.data[:Count] end # Adds the given reference to the store and returns the reference object. # If the object provided is not a Prawn::Core::Reference, one is created from the # arguments provided. # def push(*args, &block) reference = if args.first.is_a?(Prawn::Core::Reference) args.first else Prawn::Core::Reference.new(*args, &block) end @objects[reference.identifier] = reference @identifiers << reference.identifier reference end alias_method :<<, :push def each @identifiers.each do |id| yield @objects[id] end end def [](id) @objects[id] end def size @identifiers.size end alias_method :length, :size def compact # Clear live markers each { |o| o.live = false } # Recursively mark reachable objects live, starting from the roots # (the only objects referenced in the trailer) root.mark_live info.mark_live # Renumber live objects to eliminate gaps (shrink the xref table) if @objects.any?{ |_, o| !o.live } new_id = 1 new_objects = {} new_identifiers = [] each do |obj| if obj.live obj.identifier = new_id new_objects[new_id] = obj new_identifiers << new_id new_id += 1 end end @objects = new_objects @identifiers = new_identifiers end end # returns the object ID for a particular page in the document. Pages # are indexed starting at 1 (not 0!). # # object_id_for_page(1) # => 5 # object_id_for_page(10) # => 87 # object_id_for_page(-11) # => 17 # def object_id_for_page(k) k -= 1 if k > 0 flat_page_ids = get_page_objects(pages).flatten flat_page_ids[k] end # imports all objects required to render a page from another PDF. The # objects are added to the current object store, but NOT linked # anywhere. # # The object ID of the root Page object is returned, it's up to the # calling code to link that into the document structure somewhere. If # this isn't done the imported objects will just be removed when the # store is compacted. # # Imports nothing and returns nil if the requested page number doesn't # exist. page_num is 1 indexed, so 1 indicates the first page. # def import_page(input, page_num) @loaded_objects = {} if template_id = indexed_template(input, page_num) return template_id end io = if input.respond_to?(:seek) && input.respond_to?(:read) input elsif File.file?(input.to_s) StringIO.new(File.binread(input.to_s)) else raise ArgumentError, "input must be an IO-like object or a filename" end # unless File.file?(filename) # raise ArgumentError, "#{filename} does not exist" # end hash = indexed_hash(input, io) ref = hash.page_references[page_num - 1] if ref.nil? nil else index_template(input, page_num, load_object_graph(hash, ref).identifier) end rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug." raise Prawn::Errors::TemplateError, msg rescue PDF::Reader::UnsupportedFeatureError msg = "Template file contains unsupported PDF features" raise Prawn::Errors::TemplateError, msg end private # An index for page templates so that their loaded object graph # can be reused without multiple loading def template_index @template_index ||= {} end # An index for the read object hash of a pdf template so that the # object hash does not need to be parsed multiple times when using # different pages of the pdf as page templates def hash_index @hash_index ||= {} end # returns the indexed object graph identifier for a template page if # it exists def indexed_template(input, page_number) key = indexing_key(input) template_index[key] && template_index[key][page_number] end # indexes the identifier for a page from a template def index_template(input, page_number, id) (template_index[indexing_key(input)] ||= {})[page_number] ||= id end # reads and indexes a new IO for a template # if the IO has been indexed already then the parsed object hash # is returned directly def indexed_hash(input, io) hash_index[indexing_key(input)] ||= PDF::Reader::ObjectHash.new(io) end # the index key for the input. # uses object_id so that both a string filename or an IO stream can be # indexed and reused provided the same object gets used in multiple page # template calls. def indexing_key(input) input.object_id end # returns a nested array of object IDs for all pages in this object store. # def get_page_objects(obj) if obj.data[:Type] == :Page obj.identifier elsif obj.data[:Type] == :Pages obj.data[:Kids].map { |kid| get_page_objects(kid) } end end # takes a source PDF and uses it as a template for this document. # def load_file(template) unless (template.respond_to?(:seek) && template.respond_to?(:read)) || File.file?(template) raise ArgumentError, "#{template} does not exist" end hash = PDF::Reader::ObjectHash.new(template) src_info = hash.trailer[:Info] src_root = hash.trailer[:Root] @min_version = hash.pdf_version.to_f if hash.trailer[:Encrypt] msg = "Template file is an encrypted PDF, it can't be used as a template" raise Prawn::Errors::TemplateError, msg end if src_info @info = load_object_graph(hash, src_info).identifier end if src_root @root = load_object_graph(hash, src_root).identifier end rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug." raise Prawn::Errors::TemplateError, msg rescue PDF::Reader::UnsupportedFeatureError msg = "Template file contains unsupported PDF features" raise Prawn::Errors::TemplateError, msg end # recurse down an object graph from a source PDF, importing all the # indirect objects we find. # # hash is the PDF::Reader::ObjectHash to extract objects from, object is # the object to extract. # def load_object_graph(hash, object) @loaded_objects ||= {} case object when ::Hash then object.each { |key,value| object[key] = load_object_graph(hash, value) } object when Array then object.map { |item| load_object_graph(hash, item)} when PDF::Reader::Reference then unless @loaded_objects.has_key?(object.id) @loaded_objects[object.id] = ref(nil) new_obj = load_object_graph(hash, hash[object]) if new_obj.kind_of?(PDF::Reader::Stream) stream_dict = load_object_graph(hash, new_obj.hash) @loaded_objects[object.id].data = stream_dict @loaded_objects[object.id] << new_obj.data else @loaded_objects[object.id].data = new_obj end end @loaded_objects[object.id] when PDF::Reader::Stream # Stream is a subclass of string, so this is here to prevent the stream # being wrapped in a LiteralString object when String is_utf8?(object) ? object : Prawn::Core::ByteString.new(object) else object end end ruby_18 do def is_utf8?(str) begin str.unpack("U*") true rescue false end end end ruby_19 do def is_utf8?(str) str.force_encoding("utf-8") str.valid_encoding? end end end end end