lib/origami/pdf.rb in origami-1.2.2 vs lib/origami/pdf.rb in origami-1.2.3

- old
+ new

@@ -59,12 +59,12 @@ require 'origami/parsers/pdf' module Origami - VERSION = "1.2.2" - REVISION = "$Revision: rev 135/, 2011/10/17 11:59:41 $" #:nodoc: + VERSION = "1.2.3" + REVISION = "$Revision: rev 143/, 2011/10/20 16:22:40 $" #:nodoc: # # Global options for Origami. # OPTIONS = @@ -263,17 +263,10 @@ @parser = parser self end # - # Returns the virtual file size as it would be taking on disk. - # - def filesize - self.to_bin(:rebuildxrefs => false).size - end - - # # Saves the current document. # _filename_:: The path where to save this PDF. # def save(path, params = {}) @@ -299,13 +292,13 @@ fd = File.open(path, 'w').binmode end intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i self.delinearize! if options[:delinearize] and self.is_linearized? - self.compile(options) if options[:recompile] + compile(options) if options[:recompile] - fd.write self.to_bin(options) + fd.write output(options) fd.close self end alias saveas save @@ -540,10 +533,134 @@ object.reference end # + # Ends the current Revision, and starts a new one. + # + def add_new_revision + + root = @revisions.last.trailer[:Root] unless @revisions.empty? + + @revisions << Revision.new(self) + @revisions.last.trailer = Trailer.new + @revisions.last.trailer.Root = root + + self + end + + # + # Removes a whole document revision. + # _index_:: Revision index, first is 0. + # + def remove_revision(index) + if index < 0 or index > @revisions.size + raise IndexError, "Not a valid revision index" + end + + if @revisions.size == 1 + raise InvalidPDFError, "Cannot remove last revision" + end + + @revisions.delete_at(index) + self + end + + # + # Looking for an object present at a specified file offset. + # + def get_object_by_offset(offset) #:nodoc: + self.indirect_objects.find { |obj| obj.file_offset == offset } + end + + # + # Remove an object. + # + def delete_object(no, generation = 0) + + case no + when Reference + target = no + when ::Integer + target = Reference.new(no, generation) + else + raise TypeError, "Invalid parameter type : #{no.class}" + end + + @revisions.each do |rev| + rev.body.delete(target) + end + + end + + # + # Search for an indirect object in the document. + # _no_:: Reference or number of the object. + # _generation_:: Object generation. + # + def get_object(no, generation = 0, use_xrefstm = true) #:nodoc: + case no + when Reference + target = no + when ::Integer + target = Reference.new(no, generation) + when Origami::Object + return no + else + raise TypeError, "Invalid parameter type : #{no.class}" + end + + set = indirect_objects_table + + # + # Search through accessible indirect objects. + # + if set.include?(target) + set[target] + elsif use_xrefstm == true + # Look into XRef streams. + + if @revisions.last.has_xrefstm? + xrefstm = @revisions.last.xrefstm + + done = [] + while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm) + xref = xrefstm.find(target.refno) + + # + # We found a matching XRef. + # + if xref.is_a?(XRefToCompressedObj) + objstm = get_object(xref.objstmno, 0, false) + + object = objstm.extract_by_index(xref.index) + if object.is_a?(Origami::Object) and object.no == target.refno + return object + else + return objstm.extract(target.refno) + end + elsif xrefstm.has_field?(:Prev) + done << xrefstm + xrefstm = get_object_by_offset(xrefstm.Prev) + else + break + end + end + end + + # + # Lastly search directly into Object streams (might be very slow). + # + stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end + stream && stream.extract(target.refno) + end + + end + + alias :[] :get_object + + # # Returns a new number/generation for future object. # def alloc_new_object_number no = 1 @@ -559,11 +676,42 @@ no = allocated.max + 1 unless allocated.empty? [ no, 0 ] end + ########################## + private + ########################## + # + # Compute and update XRef::Section for each Revision. + # + def rebuildxrefs + + size = 0 + startxref = @header.to_s.size + + @revisions.each do |revision| + + revision.objects.each do |object| + startxref += object.to_s.size + end + + size += revision.body.size + revision.xreftable = buildxrefs(revision.objects) + + revision.trailer ||= Trailer.new + revision.trailer.Size = size + 1 + revision.trailer.startxref = startxref + + startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size + end + + self + end + + # # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file. # * Allocates objects references. # * Sets some objects missing required values. # def compile(options = {}) @@ -594,15 +742,117 @@ self end # + # Cleans the document from its references. + # Indirects objects are made direct whenever possible. + # TODO: Circuit-checking to avoid infinite induction + # + def logicalize #:nodoc: + + fail "Not yet supported" + + processed = [] + + def convert(root) #:nodoc: + + replaced = [] + if root.is_a?(Dictionary) or root.is_a?(Array) + + root.each { |obj| + convert(obj) + } + + root.map! { |obj| + if obj.is_a?(Reference) + target = obj.solve + # Streams can't be direct objects + if target.is_a?(Stream) + obj + else + replaced << obj + target + end + else + obj + end + } + + end + + replaced + end + + @revisions.each do |revision| + revision.objects.each do |obj| + processed.concat(convert(obj)) + end + end + + end + + # + # Converts a logical PDF view into a physical view ready for writing. + # + def physicalize + + # + # Indirect objects are added to the revision and assigned numbers. + # + def build(obj, revision) #:nodoc: + + # + # Finalize any subobjects before building the stream. + # + if obj.is_a?(ObjectStream) + obj.each do |subobj| + build(subobj, revision) + end + end + + obj.pre_build + + if obj.is_a?(Dictionary) or obj.is_a?(Array) + + obj.map! do |subobj| + if subobj.is_indirect? + if get_object(subobj.reference) + subobj.reference + else + ref = add_to_revision(subobj, revision) + build(subobj, revision) + ref + end + else + subobj + end + end + + obj.each do |subobj| + build(subobj, revision) + end + + elsif obj.is_a?(Stream) + build(obj.dictionary, revision) + end + + obj.post_build + + end + + indirect_objects_by_rev.each do |obj, revision| + build(obj, revision) + end + + self + end + + # # Returns the final binary representation of the current document. - # _rebuildxrefs_:: Computes xrefs while writing objects (default true). - # _obfuscate_:: Do some basic syntactic object obfuscation. # - def to_bin(params = {}) + def output(params = {}) has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)} options = { @@ -664,11 +914,11 @@ end objset = rev.objects objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm| - objset |= objstm.objects + objset.concat objstm.objects end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true # For each object, in number order objset.sort.each do |obj| @@ -773,276 +1023,21 @@ bin end # - # Compute and update XRef::Section for each Revision. - # - def rebuildxrefs - - size = 0 - startxref = @header.to_s.size - - @revisions.each do |revision| - - revision.objects.each do |object| - startxref += object.to_s.size - end - - size += revision.body.size - revision.xreftable = buildxrefs(revision.objects) - - revision.trailer ||= Trailer.new - revision.trailer.Size = size + 1 - revision.trailer.startxref = startxref - - startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size - end - - self - end - - # - # Ends the current Revision, and starts a new one. - # - def add_new_revision - - root = @revisions.last.trailer[:Root] unless @revisions.empty? - - @revisions << Revision.new(self) - @revisions.last.trailer = Trailer.new - @revisions.last.trailer.Root = root - - self - end - - # - # Removes a whole document revision. - # _index_:: Revision index, first is 0. - # - def remove_revision(index) - if index < 0 or index > @revisions.size - raise IndexError, "Not a valid revision index" - end - - if @revisions.size == 1 - raise InvalidPDFError, "Cannot remove last revision" - end - - @revisions.delete_at(index) - self - end - - # - # Looking for an object present at a specified file offset. - # - def get_object_by_offset(offset) #:nodoc: - self.indirect_objects.find { |obj| obj.file_offset == offset } - end - - # - # Remove an object. - # - def delete_object(no, generation = 0) - - case no - when Reference - target = no - when ::Integer - target = Reference.new(no, generation) - else - raise TypeError, "Invalid parameter type : #{no.class}" - end - - @revisions.each do |rev| - rev.body.delete(target) - end - - end - - # - # Search for an indirect object in the document. - # _no_:: Reference or number of the object. - # _generation_:: Object generation. - # - def get_object(no, generation = 0, use_xrefstm = true) #:nodoc: - case no - when Reference - target = no - when ::Integer - target = Reference.new(no, generation) - when Origami::Object - return no - else - raise TypeError, "Invalid parameter type : #{no.class}" - end - - set = indirect_objects_table - - # - # Search through accessible indirect objects. - # - if set.include?(target) - set[target] - elsif use_xrefstm == true - # Look into XRef streams. - - if @revisions.last.has_xrefstm? - xrefstm = @revisions.last.xrefstm - - done = [] - while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm) - xref = xrefstm.find(target.refno) - - # - # We found a matching XRef. - # - if xref.is_a?(XRefToCompressedObj) - objstm = get_object(xref.objstmno, 0, false) - - object = objstm.extract_by_index(xref.index) - if object.is_a?(Origami::Object) and object.no == target.refno - return object - else - return objstm.extract(target.refno) - end - elsif xrefstm.has_field?(:Prev) - done << xrefstm - xrefstm = get_object_by_offset(xrefstm.Prev) - else - break - end - end - end - - # - # Lastly search directly into Object streams (might be very slow). - # - stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end - stream && stream.extract(target.refno) - end - - end - - alias :[] :get_object - - # - # Converts a logical PDF view into a physical view ready for writing. - # - def physicalize - - # - # Indirect objects are added to the revision and assigned numbers. - # - def build(obj, revision) #:nodoc: - - # - # Finalize any subobjects before building the stream. - # - if obj.is_a?(ObjectStream) - obj.each do |subobj| - build(subobj, revision) - end - end - - obj.pre_build - - if obj.is_a?(Dictionary) or obj.is_a?(Array) - - obj.map! do |subobj| - if subobj.is_indirect? - if get_object(subobj.reference) - subobj.reference - else - ref = add_to_revision(subobj, revision) - build(subobj, revision) - ref - end - else - subobj - end - end - - obj.each do |subobj| - build(subobj, revision) - end - - elsif obj.is_a?(Stream) - build(obj.dictionary, revision) - end - - obj.post_build - - end - - indirect_objects_by_rev.each do |obj, revision| - build(obj, revision) - end - - self - end - - # - # Cleans the document from its references. - # Indirects objects are made direct whenever possible. - # TODO: Circuit-checking to avoid infinite induction - # - def logicalize #:nodoc: - - fail "Not yet supported" - - processed = [] - - def convert(root) #:nodoc: - - replaced = [] - if root.is_a?(Dictionary) or root.is_a?(Array) - - root.each { |obj| - convert(obj) - } - - root.map! { |obj| - if obj.is_a?(Reference) - target = obj.solve - # Streams can't be direct objects - if target.is_a?(Stream) - obj - else - replaced << obj - target - end - else - obj - end - } - - end - - replaced - end - - @revisions.each do |revision| - revision.objects.each do |obj| - processed.concat(convert(obj)) - end - end - - end - - ########################## - private - ########################## - - # # Instanciates basic structures required for a valid PDF file. # def init catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new)) catalog.Pages = PageTreeNode.new.set_indirect(true) @revisions.last.trailer.Root = catalog.reference self + end + + def filesize #:nodoc: + output(:rebuildxrefs => false).size end def version_required #:nodoc: max = [ 1.0, 0 ]