# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2023 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/error'
require 'hexapdf/parser'
require 'hexapdf/revision'
require 'hexapdf/type/trailer'
module HexaPDF
# Manages the revisions of a PDF document.
#
# A PDF document has one revision when it is created. Later, new revisions are added when changes
# are made. This allows for adding information/content to a PDF file without changing the original
# content.
#
# The order of the revisions is important. In HexaPDF the oldest revision always has index 0 and
# the newest revision the highest index. This is also the order in which the revisions get
# written.
#
# *Important*: It is possible to manipulate the individual revisions and their objects oneself but
# this should only be done if one is familiar with the inner workings of HexaPDF. Otherwise it is
# best to use the convenience methods of this class to create, access or delete indirect objects.
#
# See: PDF2.0 s7.5.6, HexaPDF::Revision
class Revisions
class << self
# Loads all revisions for the document from the given IO and returns the created Revisions
# object.
#
# If the +io+ object is +nil+, an empty Revisions object is returned.
def from_io(document, io)
return new(document) if io.nil?
parser = Parser.new(io, document)
object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
revisions = []
begin
offset = parser.startxref_offset
seen_xref_offsets = {}
while offset && !seen_xref_offsets.key?(offset)
# PDF2.0 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
# should be direct. Adobe's POV is followed here. Same with :XRefStm.
xref_section, trailer = parser.load_revision(offset)
seen_xref_offsets[offset] = true
stm = trailer[:XRefStm]
if stm && !seen_xref_offsets.key?(stm)
if xref_section.max_oid == 0 && trailer[:Prev] > stm
# Revision is completely empty, with xref stream in previous revision
merge_revision = trailer[:Prev]
end
stm_xref_section, = parser.load_revision(stm)
stm_xref_section.merge!(xref_section)
xref_section = stm_xref_section
seen_xref_offsets[stm] = true
end
if parser.linearized? && !trailer.key?(:Prev)
merge_revision = offset
end
if merge_revision == offset
xref_section.merge!(revisions.first.xref_section)
offset = trailer[:Prev] # Get possible next offset before overwriting trailer
trailer = revisions.first.trailer
revisions.shift
else
offset = trailer[:Prev]
end
revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
xref_section: xref_section, loader: object_loader))
end
rescue HexaPDF::MalformedPDFError
raise unless (reconstructed_revision = parser.reconstructed_revision)
unless revisions.empty?
reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
end
revisions << reconstructed_revision
end
document.version = parser.file_header_version rescue '1.0'
new(document, initial_revisions: revisions, parser: parser)
end
end
include Enumerable
# The Parser instance used for reading the initial revisions.
attr_reader :parser
# Creates a new revisions object for the given PDF document.
#
# Options:
#
# initial_revisions::
# An array of revisions that should initially be used. If this option is not specified, a
# single empty revision is added.
#
# parser::
# The parser with which the initial revisions were read. If this option is not specified
# even though the document was read from an IO stream, some parts may not work, like
# incremental writing.
def initialize(document, initial_revisions: nil, parser: nil)
@document = document
@parser = parser
@revisions = []
if initial_revisions
@revisions += initial_revisions
else
add
end
end
# Returns the next object identifier that should be used when adding a new object.
def next_oid
@revisions.map(&:next_free_oid).max
end
# :call-seq:
# revisions.object(ref) -> obj or nil
# revisions.object(oid) -> obj or nil
#
# Returns the current version of the indirect object for the given exact reference or for the
# given object number.
#
# For references to unknown objects, +nil+ is returned but free objects are represented by a
# PDF Null object, not by +nil+!
#
# See: PDF2.0 s7.3.9
def object(ref)
i = @revisions.size - 1
while i >= 0
if (result = @revisions[i].object(ref))
return result
end
i -= 1
end
nil
end
# :call-seq:
# revisions.object?(ref) -> true or false
# revisions.object?(oid) -> true or false
#
# Returns +true+ if one of the revisions contains an indirect object for the given exact
# reference or for the given object number.
#
# Even though this method might return +true+ for some references, #object may return +nil+
# because this method takes *all* revisions into account.
def object?(ref)
@revisions.any? {|rev| rev.object?(ref) }
end
# :call-seq:
# revisions.add_object(object) -> object
#
# Adds the given HexaPDF::Object to the current revision and returns it.
#
# If +object+ is a direct object, an object number is automatically assigned.
def add_object(obj)
if obj.indirect? && (rev_obj = current.object(obj.oid))
if rev_obj.data == obj.data
return obj
else
raise HexaPDF::Error, "Can't add object because there is already " \
"an object with object number #{obj.oid}"
end
end
obj.oid = next_oid unless obj.indirect?
current.add(obj)
end
# :call-seq:
# revisions.delete_object(ref)
# revisions.delete_object(oid)
#
# Deletes the indirect object specified by an exact reference or by an object number.
def delete_object(ref)
@revisions.reverse_each do |rev|
if rev.object?(ref)
rev.delete(ref)
break
end
end
end
# :call-seq:
# revisions.each_object(only_current: true, only_loaded: false) {|obj| block } -> revisions
# revisions.each_object(only_current: true, only_loaded: false) {|obj, rev| block } -> revisions
# revisions.each_object(only_current: true, only_loaded: false) -> Enumerator
#
# Yields every object and optionally the revision it is in.
#
# If +only_loaded+ is +true+, only the already loaded objects of the PDF document are yielded.
# This does only matter when the document instance was created from an existing PDF document.
#
# By default, only the current version of each object is returned which implies that each object
# number is yielded exactly once. If the +only_current+ option is +false+, all stored objects
# from newest to oldest are returned, not only the current version of each object.
#
# The +only_current+ option can make a difference because the document can contain multiple
# revisions:
#
# * Multiple revisions may contain objects with the same object and generation numbers, e.g.
# two (different) objects with oid/gen [3,0].
#
# * Additionally, there may also be objects with the same object number but different
# generation numbers in different revisions, e.g. one object with oid/gen [3,0] and one with
# oid/gen [3,1].
#
# *Note* that setting +only_current+ to +false+ is normally not necessary and should not be
# done. If it is still done, one has to take care to avoid an invalid document state.
def each_object(only_current: true, only_loaded: false, &block)
unless block_given?
return to_enum(__method__, only_current: only_current, only_loaded: only_loaded)
end
yield_rev = (block.arity == 2)
oids = {}
@revisions.reverse_each do |rev|
rev.each(only_loaded: only_loaded) do |obj|
next if only_current && oids.include?(obj.oid)
yield_rev ? yield(obj, rev) : yield(obj)
oids[obj.oid] = true
end
end
self
end
# Returns the current revision.
#
# *Note*: This method should only be used if one is familiar with the inner workings of HexaPDF
# *and the PDF specification.
def current
@revisions.last
end
# Returns a list of all revisions.
#
# *Note*: This method should only be used if one is familiar with the inner workings of HexaPDF
# *and the PDF specification.
def all
@revisions
end
# :call-seq:
# revisions.each {|rev| block } -> revisions
# revisions.each -> Enumerator
#
# Iterates over all revisions from oldest to current one.
#
# *Note*: This method should only be used if one is familiar with the inner workings of HexaPDF
# *and the PDF specification.
def each(&block)
return to_enum(__method__) unless block_given?
@revisions.each(&block)
self
end
# Adds a new empty revision to the document and returns it.
#
# *Note*: This method should only be used if one is familiar with the inner workings of HexaPDF
# *and the PDF specification.
def add
if @revisions.empty?
trailer = {}
else
trailer = current.trailer.value.dup
trailer.delete(:Prev)
trailer.delete(:XRefStm)
end
rev = Revision.new(@document.wrap(trailer, type: :XXTrailer))
@revisions.push(rev)
rev
end
# :call-seq:
# revisions.merge(range = 0..-1) -> revisions
#
# Merges the revisions specified by the given range into one. Objects from newer revisions
# overwrite those from older ones.
def merge(range = 0..-1)
@revisions[range].reverse.each_cons(2) do |rev, prev_rev|
prev_rev.trailer.value.replace(rev.trailer.value)
rev.each do |obj|
if obj.data != prev_rev.object(obj)&.data
prev_rev.delete(obj.oid, mark_as_free: false)
prev_rev.add(obj)
end
end
end
_first, *other = *@revisions[range]
other.each {|rev| @revisions.delete(rev) }
self
end
end
end