lib/origami/pdf.rb in origami-1.2.2 vs lib/origami/pdf.rb in origami-1.2.3
- old
+ new
@@ -59,12 +59,12 @@
require 'origami/parsers/pdf'
module Origami
- VERSION = "1.2.2"
- REVISION = "$Revision: rev 135/, 2011/10/17 11:59:41 $" #:nodoc:
+ VERSION = "1.2.3"
+ REVISION = "$Revision: rev 143/, 2011/10/20 16:22:40 $" #:nodoc:
#
# Global options for Origami.
#
OPTIONS =
@@ -263,17 +263,10 @@
@parser = parser
self
end
#
- # Returns the virtual file size as it would be taking on disk.
- #
- def filesize
- self.to_bin(:rebuildxrefs => false).size
- end
-
- #
# Saves the current document.
# _filename_:: The path where to save this PDF.
#
def save(path, params = {})
@@ -299,13 +292,13 @@
fd = File.open(path, 'w').binmode
end
intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
self.delinearize! if options[:delinearize] and self.is_linearized?
- self.compile(options) if options[:recompile]
+ compile(options) if options[:recompile]
- fd.write self.to_bin(options)
+ fd.write output(options)
fd.close
self
end
alias saveas save
@@ -540,10 +533,134 @@
object.reference
end
#
+ # Ends the current Revision, and starts a new one.
+ #
+ def add_new_revision
+
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
+
+ @revisions << Revision.new(self)
+ @revisions.last.trailer = Trailer.new
+ @revisions.last.trailer.Root = root
+
+ self
+ end
+
+ #
+ # Removes a whole document revision.
+ # _index_:: Revision index, first is 0.
+ #
+ def remove_revision(index)
+ if index < 0 or index > @revisions.size
+ raise IndexError, "Not a valid revision index"
+ end
+
+ if @revisions.size == 1
+ raise InvalidPDFError, "Cannot remove last revision"
+ end
+
+ @revisions.delete_at(index)
+ self
+ end
+
+ #
+ # Looking for an object present at a specified file offset.
+ #
+ def get_object_by_offset(offset) #:nodoc:
+ self.indirect_objects.find { |obj| obj.file_offset == offset }
+ end
+
+ #
+ # Remove an object.
+ #
+ def delete_object(no, generation = 0)
+
+ case no
+ when Reference
+ target = no
+ when ::Integer
+ target = Reference.new(no, generation)
+ else
+ raise TypeError, "Invalid parameter type : #{no.class}"
+ end
+
+ @revisions.each do |rev|
+ rev.body.delete(target)
+ end
+
+ end
+
+ #
+ # Search for an indirect object in the document.
+ # _no_:: Reference or number of the object.
+ # _generation_:: Object generation.
+ #
+ def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
+ case no
+ when Reference
+ target = no
+ when ::Integer
+ target = Reference.new(no, generation)
+ when Origami::Object
+ return no
+ else
+ raise TypeError, "Invalid parameter type : #{no.class}"
+ end
+
+ set = indirect_objects_table
+
+ #
+ # Search through accessible indirect objects.
+ #
+ if set.include?(target)
+ set[target]
+ elsif use_xrefstm == true
+ # Look into XRef streams.
+
+ if @revisions.last.has_xrefstm?
+ xrefstm = @revisions.last.xrefstm
+
+ done = []
+ while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
+ xref = xrefstm.find(target.refno)
+
+ #
+ # We found a matching XRef.
+ #
+ if xref.is_a?(XRefToCompressedObj)
+ objstm = get_object(xref.objstmno, 0, false)
+
+ object = objstm.extract_by_index(xref.index)
+ if object.is_a?(Origami::Object) and object.no == target.refno
+ return object
+ else
+ return objstm.extract(target.refno)
+ end
+ elsif xrefstm.has_field?(:Prev)
+ done << xrefstm
+ xrefstm = get_object_by_offset(xrefstm.Prev)
+ else
+ break
+ end
+ end
+ end
+
+ #
+ # Lastly search directly into Object streams (might be very slow).
+ #
+ stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end
+ stream && stream.extract(target.refno)
+ end
+
+ end
+
+ alias :[] :get_object
+
+ #
# Returns a new number/generation for future object.
#
def alloc_new_object_number
no = 1
@@ -559,11 +676,42 @@
no = allocated.max + 1 unless allocated.empty?
[ no, 0 ]
end
+ ##########################
+ private
+ ##########################
+
#
+ # Compute and update XRef::Section for each Revision.
+ #
+ def rebuildxrefs
+
+ size = 0
+ startxref = @header.to_s.size
+
+ @revisions.each do |revision|
+
+ revision.objects.each do |object|
+ startxref += object.to_s.size
+ end
+
+ size += revision.body.size
+ revision.xreftable = buildxrefs(revision.objects)
+
+ revision.trailer ||= Trailer.new
+ revision.trailer.Size = size + 1
+ revision.trailer.startxref = startxref
+
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
+ end
+
+ self
+ end
+
+ #
# This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
# * Allocates objects references.
# * Sets some objects missing required values.
#
def compile(options = {})
@@ -594,15 +742,117 @@
self
end
#
+ # Cleans the document from its references.
+ # Indirects objects are made direct whenever possible.
+ # TODO: Circuit-checking to avoid infinite induction
+ #
+ def logicalize #:nodoc:
+
+ fail "Not yet supported"
+
+ processed = []
+
+ def convert(root) #:nodoc:
+
+ replaced = []
+ if root.is_a?(Dictionary) or root.is_a?(Array)
+
+ root.each { |obj|
+ convert(obj)
+ }
+
+ root.map! { |obj|
+ if obj.is_a?(Reference)
+ target = obj.solve
+ # Streams can't be direct objects
+ if target.is_a?(Stream)
+ obj
+ else
+ replaced << obj
+ target
+ end
+ else
+ obj
+ end
+ }
+
+ end
+
+ replaced
+ end
+
+ @revisions.each do |revision|
+ revision.objects.each do |obj|
+ processed.concat(convert(obj))
+ end
+ end
+
+ end
+
+ #
+ # Converts a logical PDF view into a physical view ready for writing.
+ #
+ def physicalize
+
+ #
+ # Indirect objects are added to the revision and assigned numbers.
+ #
+ def build(obj, revision) #:nodoc:
+
+ #
+ # Finalize any subobjects before building the stream.
+ #
+ if obj.is_a?(ObjectStream)
+ obj.each do |subobj|
+ build(subobj, revision)
+ end
+ end
+
+ obj.pre_build
+
+ if obj.is_a?(Dictionary) or obj.is_a?(Array)
+
+ obj.map! do |subobj|
+ if subobj.is_indirect?
+ if get_object(subobj.reference)
+ subobj.reference
+ else
+ ref = add_to_revision(subobj, revision)
+ build(subobj, revision)
+ ref
+ end
+ else
+ subobj
+ end
+ end
+
+ obj.each do |subobj|
+ build(subobj, revision)
+ end
+
+ elsif obj.is_a?(Stream)
+ build(obj.dictionary, revision)
+ end
+
+ obj.post_build
+
+ end
+
+ indirect_objects_by_rev.each do |obj, revision|
+ build(obj, revision)
+ end
+
+ self
+ end
+
+ #
# Returns the final binary representation of the current document.
- # _rebuildxrefs_:: Computes xrefs while writing objects (default true).
- # _obfuscate_:: Do some basic syntactic object obfuscation.
#
- def to_bin(params = {})
+ def output(params = {})
has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
options =
{
@@ -664,11 +914,11 @@
end
objset = rev.objects
objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
- objset |= objstm.objects
+ objset.concat objstm.objects
end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
# For each object, in number order
objset.sort.each do |obj|
@@ -773,276 +1023,21 @@
bin
end
#
- # Compute and update XRef::Section for each Revision.
- #
- def rebuildxrefs
-
- size = 0
- startxref = @header.to_s.size
-
- @revisions.each do |revision|
-
- revision.objects.each do |object|
- startxref += object.to_s.size
- end
-
- size += revision.body.size
- revision.xreftable = buildxrefs(revision.objects)
-
- revision.trailer ||= Trailer.new
- revision.trailer.Size = size + 1
- revision.trailer.startxref = startxref
-
- startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
- end
-
- self
- end
-
- #
- # Ends the current Revision, and starts a new one.
- #
- def add_new_revision
-
- root = @revisions.last.trailer[:Root] unless @revisions.empty?
-
- @revisions << Revision.new(self)
- @revisions.last.trailer = Trailer.new
- @revisions.last.trailer.Root = root
-
- self
- end
-
- #
- # Removes a whole document revision.
- # _index_:: Revision index, first is 0.
- #
- def remove_revision(index)
- if index < 0 or index > @revisions.size
- raise IndexError, "Not a valid revision index"
- end
-
- if @revisions.size == 1
- raise InvalidPDFError, "Cannot remove last revision"
- end
-
- @revisions.delete_at(index)
- self
- end
-
- #
- # Looking for an object present at a specified file offset.
- #
- def get_object_by_offset(offset) #:nodoc:
- self.indirect_objects.find { |obj| obj.file_offset == offset }
- end
-
- #
- # Remove an object.
- #
- def delete_object(no, generation = 0)
-
- case no
- when Reference
- target = no
- when ::Integer
- target = Reference.new(no, generation)
- else
- raise TypeError, "Invalid parameter type : #{no.class}"
- end
-
- @revisions.each do |rev|
- rev.body.delete(target)
- end
-
- end
-
- #
- # Search for an indirect object in the document.
- # _no_:: Reference or number of the object.
- # _generation_:: Object generation.
- #
- def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
- case no
- when Reference
- target = no
- when ::Integer
- target = Reference.new(no, generation)
- when Origami::Object
- return no
- else
- raise TypeError, "Invalid parameter type : #{no.class}"
- end
-
- set = indirect_objects_table
-
- #
- # Search through accessible indirect objects.
- #
- if set.include?(target)
- set[target]
- elsif use_xrefstm == true
- # Look into XRef streams.
-
- if @revisions.last.has_xrefstm?
- xrefstm = @revisions.last.xrefstm
-
- done = []
- while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
- xref = xrefstm.find(target.refno)
-
- #
- # We found a matching XRef.
- #
- if xref.is_a?(XRefToCompressedObj)
- objstm = get_object(xref.objstmno, 0, false)
-
- object = objstm.extract_by_index(xref.index)
- if object.is_a?(Origami::Object) and object.no == target.refno
- return object
- else
- return objstm.extract(target.refno)
- end
- elsif xrefstm.has_field?(:Prev)
- done << xrefstm
- xrefstm = get_object_by_offset(xrefstm.Prev)
- else
- break
- end
- end
- end
-
- #
- # Lastly search directly into Object streams (might be very slow).
- #
- stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end
- stream && stream.extract(target.refno)
- end
-
- end
-
- alias :[] :get_object
-
- #
- # Converts a logical PDF view into a physical view ready for writing.
- #
- def physicalize
-
- #
- # Indirect objects are added to the revision and assigned numbers.
- #
- def build(obj, revision) #:nodoc:
-
- #
- # Finalize any subobjects before building the stream.
- #
- if obj.is_a?(ObjectStream)
- obj.each do |subobj|
- build(subobj, revision)
- end
- end
-
- obj.pre_build
-
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
-
- obj.map! do |subobj|
- if subobj.is_indirect?
- if get_object(subobj.reference)
- subobj.reference
- else
- ref = add_to_revision(subobj, revision)
- build(subobj, revision)
- ref
- end
- else
- subobj
- end
- end
-
- obj.each do |subobj|
- build(subobj, revision)
- end
-
- elsif obj.is_a?(Stream)
- build(obj.dictionary, revision)
- end
-
- obj.post_build
-
- end
-
- indirect_objects_by_rev.each do |obj, revision|
- build(obj, revision)
- end
-
- self
- end
-
- #
- # Cleans the document from its references.
- # Indirects objects are made direct whenever possible.
- # TODO: Circuit-checking to avoid infinite induction
- #
- def logicalize #:nodoc:
-
- fail "Not yet supported"
-
- processed = []
-
- def convert(root) #:nodoc:
-
- replaced = []
- if root.is_a?(Dictionary) or root.is_a?(Array)
-
- root.each { |obj|
- convert(obj)
- }
-
- root.map! { |obj|
- if obj.is_a?(Reference)
- target = obj.solve
- # Streams can't be direct objects
- if target.is_a?(Stream)
- obj
- else
- replaced << obj
- target
- end
- else
- obj
- end
- }
-
- end
-
- replaced
- end
-
- @revisions.each do |revision|
- revision.objects.each do |obj|
- processed.concat(convert(obj))
- end
- end
-
- end
-
- ##########################
- private
- ##########################
-
- #
# Instanciates basic structures required for a valid PDF file.
#
def init
catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
catalog.Pages = PageTreeNode.new.set_indirect(true)
@revisions.last.trailer.Root = catalog.reference
self
+ end
+
+ def filesize #:nodoc:
+ output(:rebuildxrefs => false).size
end
def version_required #:nodoc:
max = [ 1.0, 0 ]