lib/origami/pdf.rb in origami-2.0.0 vs lib/origami/pdf.rb in origami-2.0.1
- old
+ new
@@ -43,12 +43,10 @@
require 'origami/annotations'
require 'origami/actions'
require 'origami/3d'
require 'origami/signature'
require 'origami/webcapture'
-require 'origami/export'
-require 'origami/webcapture'
require 'origami/encryption'
require 'origami/linearization'
require 'origami/obfuscation'
require 'origami/javascript'
require 'origami/outline'
@@ -145,39 +143,27 @@
pdf = PDF.new
yield(pdf) if block_given?
pdf.save(output, options)
end
alias write create
-
- #
- # Deserializes a PDF dump.
- #
- def deserialize(filename)
- Zlib::GzipReader.open(filename) { |gz|
- return Marshal.load(gz.read)
- }
- end
end
#
# Creates a new PDF instance.
# _parser_:: The Parser object creating the document.
# If none is specified, some default structures are automatically created to get a minimal working document.
#
def initialize(parser = nil)
@header = PDF::Header.new
@revisions = []
+ @parser = parser
+ @loaded = false
add_new_revision
@revisions.first.trailer = Trailer.new
- if parser
- @loaded = false
- @parser = parser
- else
- init
- end
+ init if parser.nil?
end
#
# Original file name if parsed from disk, nil otherwise.
#
@@ -198,25 +184,10 @@
def original_data
@parser.target_data if @parser
end
#
- # Serializes the current PDF.
- #
- def serialize(filename)
- parser = @parser
- @parser = nil # do not serialize the parser
-
- Zlib::GzipWriter.open(filename) { |gz|
- gz.write Marshal.dump(self)
- }
-
- @parser = parser
- self
- end
-
- #
# Saves the current document.
# _filename_:: The path where to save this PDF.
#
def save(path, params = {})
options =
@@ -275,39 +246,13 @@
pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
result = []
- search_object = -> (object) do
- case object
- when Stream
- result.concat object.dictionary.strings_cache.select{|str| pattern === str}
- result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
-
- begin
- result.push object if streams and object.data.match(pattern)
- rescue Filter::Error
- next # Skip object if a decoding error occured.
- end
-
- next if object.is_a?(ObjectStream) and not object_streams
-
- object.each do |subobject|
- search_object.call(subobject)
- end
-
- when Name, String
- result.push object if object.value.match(pattern)
-
- when Dictionary, Array then
- result.concat object.strings_cache.select{|str| pattern === str}
- result.concat object.names_cache.select{|name| pattern === name.value}
- end
- end
-
self.indirect_objects.each do |object|
- search_object.call(object)
+ result.concat search_object(object, pattern,
+ streams: streams, object_streams: object_streams)
end
result
end
@@ -327,46 +272,26 @@
#
# Iterates over the objects of the document.
# _compressed_: iterates over the objects inside object streams.
# _recursive_: iterates recursively inside objects like arrays and dictionaries.
#
- def each_object(compressed: false, recursive: false)
+ def each_object(compressed: false, recursive: false, &block)
return enum_for(__method__, compressed: compressed,
recursive: recursive
) unless block_given?
- walk_object = -> (object) do
- case object
- when Dictionary
- object.each_value do |value|
- yield(value)
- walk_object.call(value)
- end
-
- when Array
- object.each do |child|
- yield(child)
- walk_object.call(child)
- end
-
- when Stream
- yield(object.dictionary)
- walk_object.call(object.dictionary)
- end
- end
-
@revisions.each do |revision|
revision.each_object do |object|
- yield(object)
+ block.call(object)
- walk_object.call(object) if recursive
+ walk_object(object, &block) if recursive
if object.is_a?(ObjectStream) and compressed
object.each do |child_obj|
- yield(child_obj)
+ block.call(child_obj)
- walk_object.call(child_obj) if recursive
+ walk_object(child_obj) if recursive
end
end
end
end
end
@@ -537,11 +462,11 @@
next if xref.nil?
#
# We found a matching XRef.
#
- if xref.is_a?(XRefToCompressedObj)
+ if xref.is_a?(XRefToCompressedObject)
objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
object = objstm.extract_by_index(xref.index)
if object.is_a?(Origami::Object) and object.no == target.refno
return object
@@ -568,39 +493,39 @@
#
# Casts a PDF object into another object type.
# The target type must be a subtype of the original type.
#
- def cast_object(reference, type, parser = nil) #:nodoc:
+ def cast_object(reference, type) #:nodoc:
@revisions.each do |rev|
- if rev.body.include?(reference) and type < rev.body[reference].class
- rev.body[reference] = rev.body[reference].cast_to(type, parser)
+ if rev.body.include?(reference)
+ object = rev.body[reference]
+ return object if object.is_a?(type)
- rev.body[reference]
- else
- nil
+ if type < rev.body[reference].class
+ rev.body[reference] = object.cast_to(type, @parser)
+
+ return rev.body[reference]
+ end
end
end
+
+ nil
end
#
# Returns a new number/generation for future object.
#
def allocate_new_object_number
- no = 1
- # Deprecated number allocation policy (first available)
- #no = no + 1 while get_object(no)
-
- objset = self.indirect_objects
- self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
- objstm.each{|obj| objset << obj}
+ last_object = self.each_object(compressed: true).max_by {|object| object.no }
+ if last_object.nil?
+ no = 1
+ else
+ no = last_object.no + 1
end
- allocated = objset.collect{|obj| obj.no}.compact
- no = allocated.max + 1 unless allocated.empty?
-
[ no, 0 ]
end
#
# Mark the document as complete.
@@ -613,10 +538,74 @@
##########################
private
##########################
#
+ # Iterates over the children of an object, avoiding cycles.
+ #
+ def walk_object(object, excludes: [])
+ return enum_for(__method__, object, excludes: excludes) unless block_given?
+
+ return if excludes.include?(object)
+ excludes.push(object)
+
+ case object
+ when Dictionary
+ object.each_value do |value|
+ yield(value)
+ walk_object(value, excludes: excludes)
+ end
+
+ when Array
+ object.each do |child|
+ yield(child)
+ walk_object(child, excludes: excludes)
+ end
+
+ when Stream
+ yield(object.dictionary)
+ walk_object(object.dictionary, excludes: excludes)
+ end
+ end
+
+ #
+ # Searches through an object, possibly going into object streams.
+ # Returns an array of matching strings, names and streams.
+ #
+ def search_object(object, pattern, streams: true, object_streams: true)
+ result = []
+
+ case object
+ when Stream
+ result.concat object.dictionary.strings_cache.select{|str| pattern === str}
+ result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
+
+ begin
+ result.push object if streams and object.data.match(pattern)
+ rescue Filter::Error
+ return result # Skip object if a decoding error occured.
+ end
+
+ return result unless object.is_a?(ObjectStream) and object_streams
+
+ object.each do |child|
+ result.concat search_object(child, pattern,
+ streams: streams, object_streams: object_streams)
+ end
+
+ when Name, String
+ result.push object if object.value.match(pattern)
+
+ when Dictionary, Array
+ result.concat object.strings_cache.select{|str| pattern === str}
+ result.concat object.names_cache.select{|name| pattern === name.value}
+ end
+
+ result
+ end
+
+ #
# Load an object from its given file offset.
# The document must have an associated Parser.
#
def load_object_at_offset(revision, offset)
return nil if @loaded or @parser.nil?
@@ -625,23 +614,11 @@
begin
object = @parser.parse_object(offset)
return nil if object.nil?
if self.is_a?(Encryption::EncryptedDocument)
- case object
- when String
- object.extend(Encryption::EncryptedString)
- object.decrypted = false
- when Stream
- object.extend(Encryption::EncryptedStream)
- object.decrypted = false
- when Dictionary, Array
- object.strings_cache.each do |string|
- string.extend(Encryption::EncryptedString)
- string.decrypted = false
- end
- end
+ make_encrypted_object(object)
end
add_to_revision(object, revision)
ensure
@parser.pos = pos
@@ -649,10 +626,26 @@
object
end
#
+ # Method called on encrypted objects loaded into the document.
+ #
+ def make_encrypted_object(object)
+ case object
+ when String
+ object.extend(Encryption::EncryptedString)
+ when Stream
+ object.extend(Encryption::EncryptedStream)
+ when Dictionary, Array
+ object.strings_cache.each do |string|
+ string.extend(Encryption::EncryptedString)
+ end
+ end
+ end
+
+ #
# Force the loading of all objects in the document.
#
def load_all_objects
return if @loaded or @parser.nil?
@@ -663,12 +656,12 @@
xrefs = revision.xrefstm
else
next
end
- xrefs.each_with_number do |_, no|
- self.get_object(no)
+ xrefs.each_with_number do |xref, no|
+ self.get_object(no) unless xref.free?
end
end
@loaded = true
end
@@ -714,15 +707,11 @@
#
# Allocates object numbers and creates references.
# Invokes object finalization methods.
#
- if self.is_a?(Encryption::EncryptedDocument)
- physicalize(options)
- else
- physicalize
- end
+ physicalize(options)
#
# Sets the PDF version header.
#
version, level = version_required
@@ -733,104 +722,62 @@
self
end
#
- # Cleans the document from its references.
- # Indirects objects are made direct whenever possible.
- # TODO: Circuit-checking to avoid infinite induction
+ # Converts a logical PDF view into a physical view ready for writing.
#
- def logicalize #:nodoc:
- raise NotImplementedError
+ def physicalize(options = {})
- processed = []
-
- convert = -> (root) do
- replaced = []
- if root.is_a?(Dictionary) or root.is_a?(Array)
- root.each do |obj|
- convert[obj]
- end
-
- root.map! do |obj|
- if obj.is_a?(Reference)
- target = obj.solve
- # Streams can't be direct objects
- if target.is_a?(Stream)
- obj
- else
- replaced << obj
- target
- end
- else
- obj
- end
- end
- end
-
- replaced
+ indirect_objects_by_rev.each do |obj, revision|
+ build_object(obj, revision, options)
end
- @revisions.each do |revision|
- revision.objects.each do |obj|
- processed.concat(convert[obj])
- end
- end
+ self
end
- #
- # Converts a logical PDF view into a physical view ready for writing.
- #
- def physicalize
-
- #
- # Indirect objects are added to the revision and assigned numbers.
- #
- build = -> (obj, revision) do
- #
- # Finalize any subobjects before building the stream.
- #
- if obj.is_a?(ObjectStream)
- obj.each do |subobj|
- build.call(subobj, revision)
- end
+ def build_object(object, revision, options)
+ # Build any compressed object before building the object stream.
+ if object.is_a?(ObjectStream)
+ object.each do |compressed_obj|
+ build_object(compressed_obj, revision, options)
end
+ end
- obj.pre_build
+ object.pre_build
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
+ case object
+ when Stream
+ build_object(object.dictionary, revision, options)
+ when Dictionary, Array
+ build_compound_object(object, revision, options)
+ end
- obj.map! do |subobj|
- if subobj.indirect?
- if get_object(subobj.reference)
- subobj.reference
- else
- ref = add_to_revision(subobj, revision)
- build.call(subobj, revision)
- ref
- end
- else
- subobj
- end
- end
+ object.post_build
+ end
- obj.each do |subobj|
- build.call(subobj, revision)
- end
+ def build_compound_object(object, revision, options)
+ return unless object.is_a?(Dictionary) or object.is_a?(Array)
- elsif obj.is_a?(Stream)
- build.call(obj.dictionary, revision)
- end
+ # Flatten the object by adding indirect objects to the revision and
+ # replacing them with their reference.
+ object.map! do |child|
+ next(child) unless child.indirect?
- obj.post_build
+ if get_object(child.reference)
+ child.reference
+ else
+ reference = add_to_revision(child, revision)
+ build_object(child, revision, options)
+ reference
+ end
end
- indirect_objects_by_rev.each do |obj, revision|
- build.call(obj, revision)
- end
-
- self
+ # Finalize all the children objects.
+ object.each do |child|
+ build_object(child, revision, options)
+ end
end
#
# Returns the final binary representation of the current document.
#
@@ -856,13 +803,11 @@
options[:use_xrefstm] = has_objstm
options[:use_xreftable] = (not has_objstm)
end
# Get trailer dictionary
- trailer_info = get_trailer_info
- raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
- trailer_dict = trailer_info.dictionary
+ trailer_dict = self.trailer.dictionary
prev_xref_offset = nil
xrefstm_offset = nil
# Header
@@ -937,11 +882,11 @@
# Process embedded objects
if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
index = obj.parent.index(obj.no)
- xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
+ xrefs_stm << XRefToCompressedObject.new(obj.parent.no, index)
lastno_stm = obj.no
else
xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
@@ -1020,10 +965,9 @@
#
# Instanciates basic structures required for a valid PDF file.
#
def init
catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
- catalog.Pages = PageTreeNode.new.set_indirect(true)
@revisions.last.trailer.Root = catalog.reference
@loaded = true
self