lib/origami/pdf.rb in origami-1.2.7 vs lib/origami/pdf.rb in origami-2.0.0
- old
+ new
@@ -1,27 +1,22 @@
=begin
-= File
- pdf.rb
+ This file is part of Origami, PDF manipulation framework for Ruby
+ Copyright (C) 2016 Guillaume Delugré.
-= Info
- This file is part of Origami, PDF manipulation framework for Ruby
- Copyright (C) 2010 Guillaume Delugré <guillaume AT security-labs DOT org>
- All right reserved.
-
- Origami is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
+ Origami is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- Origami is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
+ Origami is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
+ You should have received a copy of the GNU Lesser General Public License
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
=end
require 'origami/object'
require 'origami/null'
@@ -31,1108 +26,1138 @@
require 'origami/boolean'
require 'origami/numeric'
require 'origami/string'
require 'origami/array'
require 'origami/stream'
+require 'origami/tree'
require 'origami/filters'
-require 'origami/trailer'
-require 'origami/xreftable'
require 'origami/header'
+require 'origami/metadata'
require 'origami/functions'
-require 'origami/catalog'
-require 'origami/font'
require 'origami/page'
+require 'origami/font'
require 'origami/graphics'
require 'origami/destinations'
-require 'origami/outline'
-require 'origami/actions'
-require 'origami/file'
+require 'origami/filespec'
+require 'origami/xfa'
require 'origami/acroform'
require 'origami/annotations'
+require 'origami/actions'
require 'origami/3d'
require 'origami/signature'
require 'origami/webcapture'
-require 'origami/metadata'
require 'origami/export'
require 'origami/webcapture'
require 'origami/encryption'
require 'origami/linearization'
require 'origami/obfuscation'
-require 'origami/xfa'
require 'origami/javascript'
+require 'origami/outline'
require 'origami/outputintents'
+require 'origami/collections'
+require 'origami/catalog'
+require 'origami/xreftable'
+require 'origami/trailer'
-require 'origami/parsers/pdf'
+require 'origami/parsers/pdf/linear'
+require 'origami/parsers/pdf/lazy'
module Origami
- class InvalidPDFError < Exception #:nodoc:
- end
-
- #
- # Main class representing a PDF file and its inner contents.
- # A PDF file contains a set of Revision.
- #
- class PDF
-
+ class InvalidPDFError < Error #:nodoc:
+ end
+
#
- # Class representing a particular revision in a PDF file.
- # Revision contains :
- # * A Body, which is a sequence of Object.
- # * A XRef::Section, holding XRef information about objects in body.
- # * A Trailer.
+ # Main class representing a PDF file and its inner contents.
+ # A PDF file contains a set of Revision.
#
- class Revision
- attr_accessor :pdf
- attr_accessor :body, :xreftable, :xrefstm, :trailer
-
- def initialize(pdf)
- @pdf = pdf
- @body = {}
- @xreftable = nil
- @xrefstm = nil
- @trailer = nil
- end
+ class PDF
- def trailer=(trl)
- trl.pdf = @pdf
- @trailer = trl
- end
+ #
+ # Class representing a particular revision in a PDF file.
+ # Revision contains :
+ # * A Body, which is a sequence of Object.
+ # * A XRef::Section, holding XRef information about objects in body.
+ # * A Trailer.
+ #
+ class Revision
+ attr_accessor :pdf
+ attr_accessor :body, :xreftable, :xrefstm
+ attr_reader :trailer
- def has_xreftable?
- not @xreftable.nil?
- end
+ def initialize(doc)
+ @document = doc
+ @body = {}
+ @xreftable = nil
+ @xrefstm = nil
+ @trailer = nil
+ end
- def has_xrefstm?
- not @xrefstm.nil?
- end
+ def trailer=(trl)
+ trl.document = @document
- def objects
- @body.values
- end
- end
+ @trailer = trl
+ end
- attr_accessor :header, :revisions
-
- class << self
-
- #
- # Reads and parses a PDF file from disk.
- #
- def read(filename, options = {})
- filename = File.expand_path(filename) if filename.is_a?(::String)
- PDF::LinearParser.new(options).parse(filename)
- end
+ def has_xreftable?
+ not @xreftable.nil?
+ end
- #
- # Creates a new PDF and saves it.
- # If a block is passed, the PDF instance can be processed before saving.
- #
- def create(output, options = {})
- pdf = PDF.new
- yield(pdf) if block_given?
- pdf.save(output, options)
- end
- alias write create
-
- #
- # Deserializes a PDF dump.
- #
- def deserialize(filename)
- Zlib::GzipReader.open(filename) { |gz|
- pdf = Marshal.load(gz.read)
- }
-
- pdf
- end
- end
-
- #
- # Creates a new PDF instance.
- # _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
- #
- def initialize(parser = nil)
- @header = PDF::Header.new
- @revisions = []
-
- add_new_revision
- @revisions.first.trailer = Trailer.new
+ def has_xrefstm?
+ not @xrefstm.nil?
+ end
- if parser
- @parser = parser
- else
- init
- end
- end
-
- #
- # Original file name if parsed from disk, nil otherwise.
- #
- def original_filename
- @parser.target_filename if @parser
- end
+ def each_object(&b)
+ @body.each_value(&b)
+ end
- #
- # Original file size if parsed from a data stream, nil otherwise.
- #
- def original_filesize
- @parser.target_filesize if @parser
- end
+ def objects
+ @body.values
+ end
+ end
- #
- # Original data parsed to create this document, nil if created from scratch.
- #
- def original_data
- @parser.target_data if @parser
- end
-
- #
- # Serializes the current PDF.
- #
- def serialize(filename)
- parser = @parser
- @parser = nil # do not serialize the parser
+ #
+ # Document header and revisions.
+ #
+ attr_accessor :header, :revisions
- Zlib::GzipWriter.open(filename) { |gz|
- gz.write Marshal.dump(self)
- }
-
- @parser = parser
- self
- end
-
- #
- # Saves the current document.
- # _filename_:: The path where to save this PDF.
- #
- def save(path, params = {})
-
- options =
- {
- :delinearize => true,
- :recompile => true,
- :decrypt => false
- }
- options.update(params)
+ class << self
+ #
+ # Reads and parses a PDF file from disk.
+ #
+ def read(path, options = {})
+ path = File.expand_path(path) if path.is_a?(::String)
+ lazy = options[:lazy]
- if self.frozen? # incompatible flags with frozen doc (signed)
- options[:recompile] =
- options[:rebuildxrefs] =
- options[:noindent] =
- options[:obfuscate] = false
- end
-
- if path.respond_to?(:write)
- fd = path
- else
- path = File.expand_path(path)
- fd = File.open(path, 'w').binmode
- end
-
- intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
- self.delinearize! if options[:delinearize] and self.is_linearized?
- compile(options) if options[:recompile]
+ if lazy
+ parser_class = PDF::LazyParser
+ else
+ parser_class = PDF::LinearParser
+ end
- fd.write output(options)
- fd.close
-
- self
- end
- alias write save
-
- #
- # Saves the file up to given revision number.
- # This can be useful to visualize the modifications over different incremental updates.
- # _revision_:: The revision number to save.
- # _filename_:: The path where to save this PDF.
- #
- def save_upto(revision, filename)
- save(filename, :up_to_revision => revision)
- end
+ parser_class.new(options).parse(path)
+ end
- #
- # Returns an array of Objects whose content is matching _pattern_.
- #
-# def grep(*patterns)
-#
-# patterns.map! do |pattern|
-# pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
-# end
-#
-# unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
-# raise TypeError, "Expected a String or Regexp"
-# end
-#
-# result = []
-# objects.each do |obj|
-# begin
-# case obj
-# when String, Name
-# result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
-# when Stream
-# result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
-# end
-# rescue Exception => e
-# puts "[#{e.class}] #{e.message}"
-#
-# next
-# end
-# end
-#
-# result
-# end
+ #
+ # Creates a new PDF and saves it.
+ # If a block is passed, the PDF instance can be processed before saving.
+ #
+ def create(output, options = {})
+ pdf = PDF.new
+ yield(pdf) if block_given?
+ pdf.save(output, options)
+ end
+ alias write create
- #
- # Returns an array of strings and streams matching the given pattern.
- #
- def grep(*patterns) #:nodoc:
- patterns.map! do |pattern|
- if pattern.is_a?(::String)
- Regexp.new(Regexp.escape(pattern), Regexp::IGNORECASE)
- else
- pattern
+ #
+ # Deserializes a PDF dump.
+ #
+ def deserialize(filename)
+ Zlib::GzipReader.open(filename) { |gz|
+ return Marshal.load(gz.read)
+ }
+ end
end
- end
- unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
- raise TypeError, "Expected a String or Regexp"
- end
+ #
+ # Creates a new PDF instance.
+ # _parser_:: The Parser object creating the document.
+ # If none is specified, some default structures are automatically created to get a minimal working document.
+ #
+ def initialize(parser = nil)
+ @header = PDF::Header.new
+ @revisions = []
- objset = []
- self.indirect_objects.each do |indobj|
- case indobj
- when Stream then
- objset.push indobj
- objset.concat(indobj.dictionary.strings_cache)
- objset.concat(indobj.dictionary.names_cache)
- when Name,String then objset.push indobj
- when Dictionary,Array then
- objset.concat(indobj.strings_cache)
- objset.concat(indobj.names_cache)
+ add_new_revision
+ @revisions.first.trailer = Trailer.new
+
+ if parser
+ @loaded = false
+ @parser = parser
+ else
+ init
+ end
end
- end
- objset.delete_if do |obj|
- begin
- case obj
- when String, Name
- not patterns.any?{|pattern| obj.value.to_s.match(pattern)}
- when Stream
- not patterns.any?{|pattern| obj.data.match(pattern)}
- end
- rescue Exception => e
- true
+ #
+ # Original file name if parsed from disk, nil otherwise.
+ #
+ def original_filename
+ @parser.target_filename if @parser
end
- end
- end
- #
- # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
- #
- def ls(*patterns)
- return objects(:include_keys => false) if patterns.empty?
+ #
+ # Original file size if parsed from a data stream, nil otherwise.
+ #
+ def original_filesize
+ @parser.target_filesize if @parser
+ end
- result = []
+ #
+ # Original data parsed to create this document, nil if created from scratch.
+ #
+ def original_data
+ @parser.target_data if @parser
+ end
- patterns.map! do |pattern|
- pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
- end
+ #
+ # Serializes the current PDF.
+ #
+ def serialize(filename)
+ parser = @parser
+ @parser = nil # do not serialize the parser
- objects(:only_keys => true).each do |key|
- if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
- value = key.parent[key]
- result << ( value.is_a?(Reference) ? value.solve : value )
+ Zlib::GzipWriter.open(filename) { |gz|
+ gz.write Marshal.dump(self)
+ }
+
+ @parser = parser
+ self
end
- end
- result
- end
+ #
+ # Saves the current document.
+ # _filename_:: The path where to save this PDF.
+ #
+ def save(path, params = {})
+ options =
+ {
+ delinearize: true,
+ recompile: true,
+ decrypt: false
+ }
+ options.update(params)
- #
- # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
- # Do not follow references.
- #
- def ls_no_follow(*patterns)
- return objects(:include_keys => false) if patterns.empty?
+ if self.frozen? # incompatible flags with frozen doc (signed)
+ options[:recompile] =
+ options[:rebuild_xrefs] =
+ options[:noindent] =
+ options[:obfuscate] = false
+ end
- result = []
+ if path.respond_to?(:write)
+ fd = path
+ else
+ path = File.expand_path(path)
+ fd = File.open(path, 'w').binmode
+ close = true
+ end
- patterns.map! do |pattern|
- pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
- end
+ load_all_objects unless @loaded
- objects(:only_keys => true).each do |key|
- if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
- value = key.parent[key]
- result << value
+ intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
+ self.delinearize! if options[:delinearize] and self.linearized?
+ compile(options) if options[:recompile]
+
+ fd.write output(options)
+ fd.close if close
+
+ self
end
- end
+ alias write save
- result
- end
+ #
+ # Saves the file up to given revision number.
+ # This can be useful to visualize the modifications over different incremental updates.
+ # _revision_:: The revision number to save.
+ # _filename_:: The path where to save this PDF.
+ #
+ def save_upto(revision, filename)
+ save(filename, up_to_revision: revision)
+ end
- #
- # Returns an array of objects matching specified block.
- #
- def find(params = {}, &b)
-
- options =
- {
- :only_indirect => false
- }
- options.update(params)
-
- objset = (options[:only_indirect] == true) ?
- self.indirect_objects : self.objects
+ #
+ # Returns an array of strings, names and streams matching the given pattern.
+ # _streams_: Search into decoded stream data.
+ # _object_streams_: Search into objects inside object streams.
+ #
+ def grep(pattern, streams: true, object_streams: true) #:nodoc:
- objset.find_all(&b)
- end
-
- #
- # Returns an array of objects embedded in the PDF body.
- # _include_objstm_:: Whether it shall return objects embedded in object streams.
- # Note : Shall return to an iterator for Ruby 1.9 comp.
- #
- def objects(params = {})
-
- def append_subobj(root, objset, opts)
-
- if objset.find{ |o| root.equal?(o) }.nil?
- objset << root unless opts[:only_keys]
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
- if root.is_a?(Dictionary)
- root.each_pair { |name, value|
- objset << name if opts[:only_keys]
+ result = []
- append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
- append_subobj(value, objset, opts)
- }
- elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
- root.each { |subobj| append_subobj(subobj, objset, opts) }
- end
- end
- end
+ search_object = -> (object) do
+ case object
+ when Stream
+ result.concat object.dictionary.strings_cache.select{|str| pattern === str}
+ result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
- options =
- {
- :include_objectstreams => true,
- :include_keys => true,
- :only_keys => false
- }
- options.update(params)
+ begin
+ result.push object if streams and object.data.match(pattern)
+ rescue Filter::Error
+ next # Skip object if a decoding error occured.
+ end
- options[:include_keys] |= options[:only_keys]
-
- objset = []
- @revisions.each do |revision|
- revision.objects.each do |object|
- append_subobj(object, objset, options)
+ next if object.is_a?(ObjectStream) and not object_streams
+
+ object.each do |subobject|
+ search_object.call(subobject)
+ end
+
+ when Name, String
+ result.push object if object.value.match(pattern)
+
+ when Dictionary, Array then
+ result.concat object.strings_cache.select{|str| pattern === str}
+ result.concat object.names_cache.select{|name| pattern === name.value}
+ end
+ end
+
+ self.indirect_objects.each do |object|
+ search_object.call(object)
+ end
+
+ result
end
- end
-
- objset
- end
-
- #
- # Return an array of indirect objects.
- #
- def indirect_objects
- @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
- end
- alias :root_objects :indirect_objects
-
- #
- # Adds a new object to the PDF file.
- # If this object has no version number, then a new one will be automatically computed and assignated to him.
- # It returns a Reference to this Object.
- # _object_:: The object to add.
- #
- def <<(object)
- owner = object.pdf
- #
- # Does object belongs to another PDF ?
- #
- if owner and not owner.equal?(self)
- import object
- else
- add_to_revision(object, @revisions.last)
- end
- end
- alias :insert :<<
-
- #
- # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
- # Object will be recursively copied and new version numbers will be assigned.
- # Returns the new reference to the imported object.
- # _object_:: The object to import.
- #
- def import(object)
- self.insert(object.export)
- end
+ #
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
+ #
+ def ls(pattern, follow_references: true)
- #
- # Adds a new object to a specific revision.
- # If this object has no version number, then a new one will be automatically computed and assignated to him.
- # It returns a Reference to this Object.
- # _object_:: The object to add.
- # _revision_:: The revision to add the object to.
- #
- def add_to_revision(object, revision)
-
- object.set_indirect(true)
- object.set_pdf(self)
-
- object.no, object.generation = alloc_new_object_number if object.no == 0
-
- revision.body[object.reference] = object
-
- object.reference
- end
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
- #
- # Ends the current Revision, and starts a new one.
- #
- def add_new_revision
-
- root = @revisions.last.trailer[:Root] unless @revisions.empty?
+ self.grep(pattern, streams: false, object_streams: true)
+ .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
+ .collect {|object| result = object.parent[object]; follow_references ? result.solve : result }
+ end
- @revisions << Revision.new(self)
- @revisions.last.trailer = Trailer.new
- @revisions.last.trailer.Root = root
+ #
+ # Iterates over the objects of the document.
+ # _compressed_: iterates over the objects inside object streams.
+ # _recursive_: iterates recursively inside objects like arrays and dictionaries.
+ #
+ def each_object(compressed: false, recursive: false)
+ return enum_for(__method__, compressed: compressed,
+ recursive: recursive
+ ) unless block_given?
- self
- end
+ walk_object = -> (object) do
+ case object
+ when Dictionary
+ object.each_value do |value|
+ yield(value)
+ walk_object.call(value)
+ end
- #
- # Removes a whole document revision.
- # _index_:: Revision index, first is 0.
- #
- def remove_revision(index)
- if index < 0 or index > @revisions.size
- raise IndexError, "Not a valid revision index"
- end
+ when Array
+ object.each do |child|
+ yield(child)
+ walk_object.call(child)
+ end
- if @revisions.size == 1
- raise InvalidPDFError, "Cannot remove last revision"
- end
+ when Stream
+ yield(object.dictionary)
+ walk_object.call(object.dictionary)
+ end
+ end
- @revisions.delete_at(index)
- self
- end
-
- #
- # Looking for an object present at a specified file offset.
- #
- def get_object_by_offset(offset) #:nodoc:
- self.indirect_objects.find { |obj| obj.file_offset == offset }
- end
+ @revisions.each do |revision|
+ revision.each_object do |object|
+ yield(object)
- #
- # Remove an object.
- #
- def delete_object(no, generation = 0)
-
- case no
- when Reference
- target = no
- when ::Integer
- target = Reference.new(no, generation)
- else
- raise TypeError, "Invalid parameter type : #{no.class}"
- end
-
- @revisions.each do |rev|
- rev.body.delete(target)
- end
+ walk_object.call(object) if recursive
- end
+ if object.is_a?(ObjectStream) and compressed
+ object.each do |child_obj|
+ yield(child_obj)
- #
- # Search for an indirect object in the document.
- # _no_:: Reference or number of the object.
- # _generation_:: Object generation.
- #
- def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
- case no
- when Reference
- target = no
- when ::Integer
- target = Reference.new(no, generation)
- when Origami::Object
- return no
- else
- raise TypeError, "Invalid parameter type : #{no.class}"
- end
-
- #
- # Search through accessible indirect objects.
- #
- @revisions.each do |rev|
- return rev.body[target] if rev.body.include?(target)
- end
+ walk_object.call(child_obj) if recursive
+ end
+ end
+ end
+ end
+ end
- # Look into XRef streams.
- if use_xrefstm == true
- if @revisions.last.has_xrefstm?
- xrefstm = @revisions.last.xrefstm
+ #
+ # Return an array of indirect objects.
+ #
+ def indirect_objects
+ @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
+ end
+ alias root_objects indirect_objects
- done = []
- while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
- xref = xrefstm.find(target.refno)
-
+ #
+ # Adds a new object to the PDF file.
+ # If this object has no version number, then a new one will be automatically
+ # computed and assignated to him.
+ #
+ # It returns a Reference to this Object.
+ # _object_:: The object to add.
+ #
+ def <<(object)
+ owner = object.document
+
#
- # We found a matching XRef.
+ # Does object belongs to another PDF ?
#
- if xref.is_a?(XRefToCompressedObj)
- objstm = get_object(xref.objstmno, 0, false)
-
- object = objstm.extract_by_index(xref.index)
- if object.is_a?(Origami::Object) and object.no == target.refno
- return object
- else
- return objstm.extract(target.refno)
- end
- elsif xrefstm.has_field?(:Prev)
- done << xrefstm
- xrefstm = get_object_by_offset(xrefstm.Prev)
+ if owner and not owner.equal?(self)
+ import object
else
- break
+ add_to_revision(object, @revisions.last)
end
- end
end
+ alias insert <<
#
- # Lastly search directly into Object streams (might be very slow).
+ # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
+ # Object will be recursively copied and new version numbers will be assigned.
+ # Returns the new reference to the imported object.
+ # _object_:: The object to import.
#
- @revisions.each do |rev|
- streams = rev.objects.find_all{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
- return streams.first.extract(target.refno) unless streams.empty?
+ def import(object)
+ self.insert(object.export)
end
- nil
- end
-
- end
+ #
+ # Adds a new object to a specific revision.
+ # If this object has no version number, then a new one will be automatically
+ # computed and assignated to him.
+ #
+ # It returns a Reference to this Object.
+ # _object_:: The object to add.
+ # _revision_:: The revision to add the object to.
+ #
+ def add_to_revision(object, revision)
+ object.set_indirect(true)
+ object.set_document(self)
- alias :[] :get_object
+ object.no, object.generation = allocate_new_object_number if object.no == 0
- def cast_object(reference, type) #:nodoc:
- @revisions.each do |rev|
- if rev.body.include?(reference) and type < rev.body[reference].class
- rev.body[reference] = rev.body[reference].cast_to(type)
+ revision.body[object.reference] = object
+
+ object.reference
end
- end
- end
-
- #
- # Returns a new number/generation for future object.
- #
- def alloc_new_object_number
- no = 1
- # Deprecated number allocation policy (first available)
- #no = no + 1 while get_object(no)
+ #
+ # Ends the current Revision, and starts a new one.
+ #
+ def add_new_revision
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
- objset = self.indirect_objects
- self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
- objstm.each{|obj| objset << obj}
- end
+ @revisions << Revision.new(self)
+ @revisions.last.trailer = Trailer.new
+ @revisions.last.trailer.Root = root
- allocated = objset.collect{|obj| obj.no}.compact
- no = allocated.max + 1 unless allocated.empty?
-
- [ no, 0 ]
- end
-
- ##########################
- private
- ##########################
-
- #
- # Compute and update XRef::Section for each Revision.
- #
- def rebuildxrefs
-
- size = 0
- startxref = @header.to_s.size
-
- @revisions.each do |revision|
-
- revision.objects.each do |object|
- startxref += object.to_s.size
+ self
end
-
- size += revision.body.size
- revision.xreftable = buildxrefs(revision.objects)
-
- revision.trailer ||= Trailer.new
- revision.trailer.Size = size + 1
- revision.trailer.startxref = startxref
-
- startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
- end
-
- self
- end
-
- #
- # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
- # * Allocates objects references.
- # * Sets some objects missing required values.
- #
- def compile(options = {})
- #
- # A valid document must have at least one page.
- #
- append_page if pages.empty?
-
- #
- # Allocates object numbers and creates references.
- # Invokes object finalization methods.
- #
- if self.is_a?(Encryption::EncryptedDocument)
- physicalize(options)
- else
- physicalize
- end
-
- #
- # Sets the PDF version header.
- #
- version, level = version_required
- @header.majorversion = version[0,1].to_i
- @header.minorversion = version[2,1].to_i
+ #
+ # Removes a whole document revision.
+ # _index_:: Revision index, first is 0.
+ #
+ def remove_revision(index)
+ if index < 0 or index > @revisions.size
+ raise IndexError, "Not a valid revision index"
+ end
- set_extension_level(version, level) if level > 0
-
- self
- end
-
- #
- # Cleans the document from its references.
- # Indirects objects are made direct whenever possible.
- # TODO: Circuit-checking to avoid infinite induction
- #
- def logicalize #:nodoc:
+ if @revisions.size == 1
+ raise InvalidPDFError, "Cannot remove last revision"
+ end
- fail "Not yet supported"
+ @revisions.delete_at(index)
+ self
+ end
- processed = []
-
- def convert(root) #:nodoc:
+ #
+ # Looking for an object present at a specified file offset.
+ #
+ def get_object_by_offset(offset) #:nodoc:
+ self.indirect_objects.find { |obj| obj.file_offset == offset }
+ end
- replaced = []
- if root.is_a?(Dictionary) or root.is_a?(Array)
-
- root.each { |obj|
- convert(obj)
- }
+ #
+ # Remove an object.
+ #
+ def delete_object(no, generation = 0)
+ case no
+ when Reference
+ target = no
+ when ::Integer
+ target = Reference.new(no, generation)
+ else
+ raise TypeError, "Invalid parameter type : #{no.class}"
+ end
- root.map! { |obj|
- if obj.is_a?(Reference)
- target = obj.solve
- # Streams can't be direct objects
- if target.is_a?(Stream)
- obj
- else
- replaced << obj
- target
- end
+ @revisions.each do |rev|
+ rev.body.delete(target)
+ end
+ end
+
+ #
+ # Search for an indirect object in the document.
+ # _no_:: Reference or number of the object.
+ # _generation_:: Object generation.
+ #
+ def get_object(no, generation = 0, use_xrefstm: true) #:nodoc:
+ case no
+ when Reference
+ target = no
+ when ::Integer
+ target = Reference.new(no, generation)
+ when Origami::Object
+ return no
else
- obj
+ raise TypeError, "Invalid parameter type : #{no.class}"
end
- }
-
+
+ #
+ # Search through accessible indirect objects.
+ #
+ @revisions.reverse_each do |rev|
+ return rev.body[target] if rev.body.include?(target)
+ end
+
+ #
+ # Search through xref sections.
+ #
+ @revisions.reverse_each do |rev|
+ next unless rev.has_xreftable?
+
+ xref = rev.xreftable.find(target.refno)
+ next if xref.nil? or xref.free?
+
+ # Try loading the object if it is not present.
+ object = load_object_at_offset(rev, xref.offset)
+ return object unless object.nil?
+ end
+
+ return nil unless use_xrefstm
+
+ # Search through xref streams.
+ @revisions.reverse_each do |rev|
+ next unless rev.has_xrefstm?
+
+ xrefstm = rev.xrefstm
+
+ xref = xrefstm.find(target.refno)
+ next if xref.nil?
+
+ #
+ # We found a matching XRef.
+ #
+ if xref.is_a?(XRefToCompressedObj)
+ objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
+
+ object = objstm.extract_by_index(xref.index)
+ if object.is_a?(Origami::Object) and object.no == target.refno
+ return object
+ else
+ return objstm.extract(target.refno)
+ end
+ elsif xref.is_a?(XRef)
+ object = load_object_at_offset(rev, xref.offset)
+ return object unless object.nil?
+ end
+ end
+
+ #
+ # Lastly search directly into Object streams (might be very slow).
+ #
+ @revisions.reverse_each do |rev|
+ stream = rev.objects.find{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
+ return stream.extract(target.refno) unless stream.nil?
+ end
+
+ nil
end
+ alias [] get_object
- replaced
- end
+ #
+ # Casts a PDF object into another object type.
+ # The target type must be a subtype of the original type.
+ #
+ def cast_object(reference, type, parser = nil) #:nodoc:
+ @revisions.each do |rev|
+ if rev.body.include?(reference) and type < rev.body[reference].class
+ rev.body[reference] = rev.body[reference].cast_to(type, parser)
- @revisions.each do |revision|
- revision.objects.each do |obj|
- processed.concat(convert(obj))
+ rev.body[reference]
+ else
+ nil
+ end
+ end
end
- end
- end
-
- #
- # Converts a logical PDF view into a physical view ready for writing.
- #
- def physicalize
-
- #
- # Indirect objects are added to the revision and assigned numbers.
- #
- def build(obj, revision) #:nodoc:
+ #
+ # Returns a new number/generation for future object.
+ #
+ def allocate_new_object_number
+ no = 1
+ # Deprecated number allocation policy (first available)
+ #no = no + 1 while get_object(no)
+
+ objset = self.indirect_objects
+ self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
+ objstm.each{|obj| objset << obj}
+ end
+
+ allocated = objset.collect{|obj| obj.no}.compact
+ no = allocated.max + 1 unless allocated.empty?
+
+ [ no, 0 ]
+ end
+
#
- # Finalize any subobjects before building the stream.
+ # Mark the document as complete.
+ # No more objects needs to be fetched by the parser.
#
- if obj.is_a?(ObjectStream)
- obj.each do |subobj|
- build(subobj, revision)
- end
+ def loaded!
+ @loaded = true
end
-
- obj.pre_build
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
-
- obj.map! do |subobj|
- if subobj.is_indirect?
- if get_object(subobj.reference)
- subobj.reference
+ ##########################
+ private
+ ##########################
+
+ #
+ # Load an object from its given file offset.
+ # The document must have an associated Parser.
+ #
+ def load_object_at_offset(revision, offset)
+ return nil if @loaded or @parser.nil?
+ pos = @parser.pos
+
+ begin
+ object = @parser.parse_object(offset)
+ return nil if object.nil?
+
+ if self.is_a?(Encryption::EncryptedDocument)
+ case object
+ when String
+ object.extend(Encryption::EncryptedString)
+ object.decrypted = false
+ when Stream
+ object.extend(Encryption::EncryptedStream)
+ object.decrypted = false
+ when Dictionary, Array
+ object.strings_cache.each do |string|
+ string.extend(Encryption::EncryptedString)
+ string.decrypted = false
+ end
+ end
+ end
+
+ add_to_revision(object, revision)
+ ensure
+ @parser.pos = pos
+ end
+
+ object
+ end
+
+ #
+ # Force the loading of all objects in the document.
+ #
+ def load_all_objects
+ return if @loaded or @parser.nil?
+
+ @revisions.each do |revision|
+ if revision.has_xreftable?
+ xrefs = revision.xreftable
+ elsif revision.has_xrefstm?
+ xrefs = revision.xrefstm
else
- ref = add_to_revision(subobj, revision)
- build(subobj, revision)
- ref
+ next
end
- else
- subobj
- end
+
+ xrefs.each_with_number do |_, no|
+ self.get_object(no)
+ end
end
-
- obj.each do |subobj|
- build(subobj, revision)
- end
-
- elsif obj.is_a?(Stream)
- build(obj.dictionary, revision)
+
+ @loaded = true
end
- obj.post_build
-
- end
-
- indirect_objects_by_rev.each do |obj, revision|
- build(obj, revision)
- end
-
- self
- end
+ #
+ # Compute and update XRef::Section for each Revision.
+ #
+ def rebuild_xrefs
+ size = 0
+ startxref = @header.to_s.size
- #
- # Returns the final binary representation of the current document.
- #
- def output(params = {})
-
- has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
+ @revisions.each do |revision|
+ revision.objects.each do |object|
+ startxref += object.to_s.size
+ end
- options =
- {
- :rebuildxrefs => true,
- :noindent => false,
- :obfuscate => false,
- :use_xrefstm => has_objstm,
- :use_xreftable => (not has_objstm),
- :up_to_revision => @revisions.size
- }
- options.update(params)
+ size += revision.body.size
+ revision.xreftable = build_xrefs(revision.objects)
- options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
+ revision.trailer ||= Trailer.new
+ revision.trailer.Size = size + 1
+ revision.trailer.startxref = startxref
- # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
- if options[:use_xrefstm] == options[:use_xreftable]
- options[:use_xrefstm] = has_objstm
- options[:use_xreftable] = (not has_objstm)
- end
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
+ end
- # Get trailer dictionary
- trailer_info = get_trailer_info
- if trailer_info.nil?
- raise InvalidPDFError, "No trailer information found"
- end
- trailer_dict = trailer_info.dictionary
-
- prev_xref_offset = nil
- xrefstm_offset = nil
- xreftable_offset = nil
-
- # Header
- bin = ""
- bin << @header.to_s
-
- # For each revision
- @revisions[0, options[:up_to_revision]].each do |rev|
-
- # Create xref table/stream.
- if options[:rebuildxrefs] == true
- lastno_table, lastno_stm = 0, 0
- brange_table, brange_stm = 0, 0
-
- xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
- xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+ self
+ end
- if options[:use_xreftable] == true
- xrefsection = XRef::Section.new
- end
+ #
+ # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
+ # * Allocates objects references.
+ # * Sets some objects missing required values.
+ #
+ def compile(options = {})
- if options[:use_xrefstm] == true
- xrefstm = rev.xrefstm || XRefStream.new
- if xrefstm == rev.xrefstm
- xrefstm.clear
+ load_all_objects unless @loaded
+
+ #
+ # A valid document must have at least one page.
+ #
+ append_page if pages.empty?
+
+ #
+ # Allocates object numbers and creates references.
+ # Invokes object finalization methods.
+ #
+ if self.is_a?(Encryption::EncryptedDocument)
+ physicalize(options)
else
- add_to_revision(xrefstm, rev)
+ physicalize
end
- end
+
+ #
+ # Sets the PDF version header.
+ #
+ version, level = version_required
+ @header.major_version = version[0,1].to_i
+ @header.minor_version = version[2,1].to_i
+
+ set_extension_level(version, level) if level > 0
+
+ self
end
-
- objset = rev.objects
-
- objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
- objset.concat objstm.objects
- end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
- # For each object, in number order
- objset.sort.each do |obj|
-
- # Create xref entry.
- if options[:rebuildxrefs] == true
-
- # Adding subsections if needed
- if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
- xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
+ #
+ # Cleans the document from its references.
+ # Indirects objects are made direct whenever possible.
+ # TODO: Circuit-checking to avoid infinite induction
+ #
+ def logicalize #:nodoc:
+ raise NotImplementedError
- xrefs_table.clear
- brange_table = obj.no
+ processed = []
+
+ convert = -> (root) do
+ replaced = []
+ if root.is_a?(Dictionary) or root.is_a?(Array)
+ root.each do |obj|
+ convert[obj]
+ end
+
+ root.map! do |obj|
+ if obj.is_a?(Reference)
+ target = obj.solve
+ # Streams can't be direct objects
+ if target.is_a?(Stream)
+ obj
+ else
+ replaced << obj
+ target
+ end
+ else
+ obj
+ end
+ end
+ end
+
+ replaced
end
- if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
- xrefs_stm.each do |xref| xrefstm << xref end
- xrefstm.Index ||= []
- xrefstm.Index << brange_stm << xrefs_stm.length
- xrefs_stm.clear
- brange_stm = obj.no
+ @revisions.each do |revision|
+ revision.objects.each do |obj|
+ processed.concat(convert[obj])
+ end
end
+ end
- # Process embedded objects
- if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
- index = obj.parent.index(obj.no)
-
- xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
-
- lastno_stm = obj.no
- else
- xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
- xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
+ #
+ # Converts a logical PDF view into a physical view ready for writing.
+ #
+ def physicalize
- lastno_table = lastno_stm = obj.no
+ #
+ # Indirect objects are added to the revision and assigned numbers.
+ #
+ build = -> (obj, revision) do
+ #
+ # Finalize any subobjects before building the stream.
+ #
+ if obj.is_a?(ObjectStream)
+ obj.each do |subobj|
+ build.call(subobj, revision)
+ end
+ end
+
+ obj.pre_build
+
+ if obj.is_a?(Dictionary) or obj.is_a?(Array)
+
+ obj.map! do |subobj|
+ if subobj.indirect?
+ if get_object(subobj.reference)
+ subobj.reference
+ else
+ ref = add_to_revision(subobj, revision)
+ build.call(subobj, revision)
+ ref
+ end
+ else
+ subobj
+ end
+ end
+
+ obj.each do |subobj|
+ build.call(subobj, revision)
+ end
+
+ elsif obj.is_a?(Stream)
+ build.call(obj.dictionary, revision)
+ end
+
+ obj.post_build
end
+ indirect_objects_by_rev.each do |obj, revision|
+ build.call(obj, revision)
end
-
- if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
-
- # Finalize XRefStm
- if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
- xrefstm_offset = bin.size
-
- xrefs_stm.each do |xref| xrefstm << xref end
- xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
- if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
- xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
- end
+ self
+ end
- xrefstm.Index ||= []
- xrefstm.Index << brange_stm << xrefs_stm.size
-
- xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
- xrefstm.Prev = prev_xref_offset
- rev.trailer.dictionary = nil
+ #
+ # Returns the final binary representation of the current document.
+ #
+ def output(params = {})
- add_to_revision(xrefstm, rev)
+ has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
- xrefstm.pre_build
- xrefstm.post_build
+ options =
+ {
+ rebuild_xrefs: true,
+ noindent: false,
+ obfuscate: false,
+ use_xrefstm: has_objstm,
+ use_xreftable: (not has_objstm),
+ up_to_revision: @revisions.size
+ }
+ options.update(params)
+
+ options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
+
+ # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
+ if options[:use_xrefstm] == options[:use_xreftable]
+ options[:use_xrefstm] = has_objstm
+ options[:use_xreftable] = (not has_objstm)
end
- # Output object code
- if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
- bin << obj.to_s(0)
- else
- bin << obj.to_s
+ # Get trailer dictionary
+ trailer_info = get_trailer_info
+ raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
+ trailer_dict = trailer_info.dictionary
+
+ prev_xref_offset = nil
+ xrefstm_offset = nil
+
+ # Header
+ bin = ""
+ bin << @header.to_s
+
+ # For each revision
+ @revisions[0, options[:up_to_revision]].each do |rev|
+
+ # Create xref table/stream.
+ if options[:rebuild_xrefs] == true
+ lastno_table, lastno_stm = 0, 0
+ brange_table, brange_stm = 0, 0
+
+ xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
+ xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+
+ if options[:use_xreftable] == true
+ xrefsection = XRef::Section.new
+ end
+
+ if options[:use_xrefstm] == true
+ xrefstm = rev.xrefstm || XRefStream.new
+ if xrefstm == rev.xrefstm
+ xrefstm.clear
+ else
+ add_to_revision(xrefstm, rev)
+ end
+ end
+ end
+
+ objset = rev.objects
+
+ objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
+ objset.concat objstm.objects
+ end if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true
+
+ previous_obj = nil
+
+ # For each object, in number order
+ # Move any XRefStream to the end of the revision.
+ objset.sort_by {|obj| [obj.is_a?(XRefStream) ? 1 : 0, obj.no, obj.generation] }
+ .each do |obj|
+
+ # Ensures that every object has a unique reference number.
+ # Duplicates should never happen in a well-formed revision and will cause breakage of xrefs.
+ if previous_obj and previous_obj.reference == obj.reference
+ raise InvalidPDFError, "Duplicate object detected, reference #{obj.reference}"
+ else
+ previous_obj = obj
+ end
+
+ # Create xref entry.
+ if options[:rebuild_xrefs] == true
+
+ # Adding subsections if needed
+ if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
+
+ xrefs_table.clear
+ brange_table = obj.no
+ end
+
+ if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
+ xrefs_stm.each do |xref| xrefstm << xref end
+ xrefstm.Index ||= []
+ xrefstm.Index << brange_stm << xrefs_stm.length
+
+ xrefs_stm.clear
+ brange_stm = obj.no
+ end
+
+ # Process embedded objects
+ if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
+ index = obj.parent.index(obj.no)
+
+ xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
+
+ lastno_stm = obj.no
+ else
+ xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
+ xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
+
+ lastno_table = lastno_stm = obj.no
+ end
+ end
+
+ if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
+
+ # Finalize XRefStm
+ if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
+ xrefstm_offset = bin.size
+
+ xrefs_stm.each do |xref| xrefstm << xref end
+
+ xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
+ if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
+ xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
+ end
+
+ xrefstm.Index ||= []
+ xrefstm.Index << brange_stm << xrefs_stm.size
+
+ xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
+ xrefstm.Prev = prev_xref_offset
+ rev.trailer.dictionary = nil
+
+ add_to_revision(xrefstm, rev)
+
+ xrefstm.pre_build
+ xrefstm.post_build
+ end
+
+ # Output object code
+ if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
+ bin << obj.to_s(indent: 0)
+ else
+ bin << obj.to_s
+ end
+ end
+ end # end each object
+
+ rev.trailer ||= Trailer.new
+
+ # XRef table
+ if options[:rebuild_xrefs] == true
+
+ if options[:use_xreftable] == true
+ table_offset = bin.size
+
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
+ rev.xreftable = xrefsection
+
+ rev.trailer.dictionary = trailer_dict
+ rev.trailer.Size = objset.size + 1
+ rev.trailer.Prev = prev_xref_offset
+
+ rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
+ end
+
+ startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
+ rev.trailer.startxref = prev_xref_offset = startxref
+
+ end
+
+ # Trailer
+ bin << rev.xreftable.to_s if options[:use_xreftable] == true
+ bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
+
+ end # end each revision
+
+ bin
+ end
+
+ #
+ # Instanciates basic structures required for a valid PDF file.
+ #
+ def init
+ catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
+ catalog.Pages = PageTreeNode.new.set_indirect(true)
+ @revisions.last.trailer.Root = catalog.reference
+
+ @loaded = true
+
+ self
+ end
+
+ def filesize #:nodoc:
+ output(rebuild_xrefs: false).size
+ end
+
+ def version_required #:nodoc:
+ max = [ 1.0, 0 ]
+ @revisions.each do |revision|
+ revision.objects.each do |object|
+ current = object.version_required
+ max = current if (current <=> max) > 0
+ end
end
- end
+
+ max[0] = max[0].to_s
+
+ max
end
-
- rev.trailer ||= Trailer.new
-
- # XRef table
- if options[:rebuildxrefs] == true
-
- if options[:use_xreftable] == true
- table_offset = bin.size
-
- xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
- rev.xreftable = xrefsection
-
- rev.trailer.dictionary = trailer_dict
- rev.trailer.Size = objset.size + 1
- rev.trailer.Prev = prev_xref_offset
- rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
- end
+ def indirect_objects_by_rev #:nodoc:
+ @revisions.inject([]) do |set,rev|
+ objset = rev.objects
+ set.concat(objset.zip(::Array.new(objset.length, rev)))
+ end
+ end
- startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
- rev.trailer.startxref = prev_xref_offset = startxref
+ #
+ # Compute and update XRef::Section for each Revision.
+ #
+ def rebuild_dummy_xrefs #:nodoc
- end # end each rev
-
- # Trailer
- bin << rev.xreftable.to_s if options[:use_xreftable] == true
- bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
-
- end
-
- bin
- end
-
- #
- # Instanciates basic structures required for a valid PDF file.
- #
- def init
- catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
- catalog.Pages = PageTreeNode.new.set_indirect(true)
- @revisions.last.trailer.Root = catalog.reference
+ build_dummy_xrefs = -> (objects) do
+ lastno = 0
+ brange = 0
- self
- end
-
- def filesize #:nodoc:
- output(:rebuildxrefs => false).size
- end
-
- def version_required #:nodoc:
-
- max = [ 1.0, 0 ]
- @revisions.each { |revision|
- revision.objects.each { |object|
- current = object.pdf_version_required
- max = current if (current <=> max) > 0
- }
- }
- max[0] = max[0].to_s
-
- max
- end
-
- def indirect_objects_by_rev #:nodoc:
- @revisions.inject([]) do |set,rev|
- objset = rev.objects
- set.concat(objset.zip(::Array.new(objset.length, rev)))
- end
- end
-
- #
- # Compute and update XRef::Section for each Revision.
- #
- def rebuild_dummy_xrefs #:nodoc
-
- def build_dummy_xrefs(objects)
-
- lastno = 0
- brange = 0
-
- xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
- xrefsection = XRef::Section.new
- objects.sort.each { |object|
- if (object.no - lastno).abs > 1
+ xrefsection = XRef::Section.new
+ objects.sort.each do |object|
+ if (object.no - lastno).abs > 1
+ xrefsection << XRef::Subsection.new(brange, xrefs)
+ brange = object.no
+ xrefs.clear
+ end
+
+ xrefs << XRef.new(0, 0, XRef::FREE)
+
+ lastno = object.no
+ end
+
+ xrefsection << XRef::Subsection.new(brange, xrefs)
+
+ xrefsection
+ end
+
+ size = 0
+ startxref = @header.to_s.size
+
+ @revisions.each do |revision|
+ revision.objects.each do |object|
+ startxref += object.to_s.size
+ end
+
+ size += revision.body.size
+ revision.xreftable = build_dummy_xrefs.call(revision.objects)
+
+ revision.trailer ||= Trailer.new
+ revision.trailer.Size = size + 1
+ revision.trailer.startxref = startxref
+
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
+ end
+
+ self
+ end
+
+ #
+ # Build a xref section from a set of objects.
+ #
+ def build_xrefs(objects) #:nodoc:
+
+ lastno = 0
+ brange = 0
+
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+
+ xrefsection = XRef::Section.new
+ objects.sort.each do |object|
+ if (object.no - lastno).abs > 1
+ xrefsection << XRef::Subsection.new(brange, xrefs)
+ brange = object.no
+ xrefs.clear
+ end
+
+ xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
+
+ lastno = object.no
+ end
+
xrefsection << XRef::Subsection.new(brange, xrefs)
- brange = object.no
- xrefs.clear
- end
-
- xrefs << XRef.new(0, 0, XRef::FREE)
- lastno = object.no
- }
-
- xrefsection << XRef::Subsection.new(brange, xrefs)
-
- xrefsection
- end
-
- size = 0
- startxref = @header.to_s.size
-
- @revisions.each do |revision|
- revision.objects.each do |object|
- startxref += object.to_s.size
+ xrefsection
end
-
- size += revision.body.size
- revision.xreftable = build_dummy_xrefs(revision.objects)
-
- revision.trailer ||= Trailer.new
- revision.trailer.Size = size + 1
- revision.trailer.startxref = startxref
-
- startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
- end
-
- self
- end
-
- #
- # Build a xref section from a set of objects.
- #
- def buildxrefs(objects) #:nodoc:
-
- lastno = 0
- brange = 0
-
- xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
-
- xrefsection = XRef::Section.new
- objects.sort.each { |object|
- if (object.no - lastno).abs > 1
- xrefsection << XRef::Subsection.new(brange, xrefs)
- brange = object.no
- xrefs.clear
+
+ def delete_revision(ngen) #:nodoc:
+ @revisions.delete_at[ngen]
end
-
- xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
- lastno = object.no
- }
-
- xrefsection << XRef::Subsection.new(brange, xrefs)
-
- xrefsection
- end
-
- def delete_revision(ngen) #:nodoc:
- @revisions.delete_at[ngen]
- end
-
- def get_revision(ngen) #:nodoc:
- @revisions[ngen].body
- end
-
- def get_object_offset(no,generation) #:nodoc:
- objectoffset = @header.to_s.size
-
- @revisions.each do |revision|
- revision.objects.sort.each do |object|
- if object.no == no and object.generation == generation then return objectoffset
- else
- objectoffset += object.to_s.size
- end
+ def get_revision(ngen) #:nodoc:
+ @revisions[ngen].body
end
-
- objectoffset += revision.xreftable.to_s.size
- objectoffset += revision.trailer.to_s.size
- end
-
- nil
- end
- end
+ def get_object_offset(no,generation) #:nodoc:
+ objectoffset = @header.to_s.size
-end
+ @revisions.each do |revision|
+ revision.objects.sort.each do |object|
+ if object.no == no and object.generation == generation then return objectoffset
+ else
+ objectoffset += object.to_s.size
+ end
+ end
+ objectoffset += revision.xreftable.to_s.size
+ objectoffset += revision.trailer.to_s.size
+ end
+
+ nil
+ end
+ end
+
+end