# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2020 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/error' require 'hexapdf/stream' require 'hexapdf/xref_section' require 'hexapdf/type/trailer' module HexaPDF module Type # Represents PDF type XRef, cross-reference streams. # # A cross-reference stream is used as a more compact representation for an cross-reference # section and trailer dictionary. The trailer dictionary is incorporated into the stream # dictionary and the cross-reference section entries are stored in the stream itself, # compressed to save space. # # == How are Cross-reference Streams Used? # # Cross-reference stream objects are only used when parsing or writing a PDF document. # # When a file is read and a cross-reference stream is found, it is loaded and its information is # stored in a HexaPDF::Revision object. So from a user's perspective nothing changes when a # cross-reference stream instead of a cross-reference section and trailer is encountered. # # This also means that all information stored in a cross-reference stream between parsing and # writing is discarded when the PDF document gets written! # # Upon writing a revision it is checked whether that revision contains a cross-reference # stream object. If it does the cross-reference stream object is updated with the # cross-reference section and trailer information and then written. Otherwise a normal # cross-reference section plus trailer are written. # # See: PDF1.7 s7.5.8 class XRefStream < HexaPDF::Stream define_type :XRef define_field :Type, type: Symbol, default: type, required: true, indirect: false, version: '1.5' # Size is not required because it will be auto-filled before the object is written define_field :Size, type: Integer, indirect: false define_field :Index, type: PDFArray, indirect: false define_field :Prev, type: Integer, indirect: false # W is not required because it will be auto-filled on #update_with_xref_section_and_trailer define_field :W, type: PDFArray, indirect: false # Returns an XRefSection that represents the content of this cross-reference stream. # # Each invocation returns a new XRefSection object based on the current data in the # associated stream and dictionary. def xref_section index = self[:Index] || [0, self[:Size]] parse_xref_section(index, self[:W]) end # Returns a hash with the entries that represent the file trailer part of the # cross-reference stream's dictionary. # # See: Type::Trailer def trailer Trailer.each_field.with_object({}) do |(name, _data), hash| hash[name] = value[name] if key?(name) end end # Makes this cross-reference stream represent the data in the given HexaPDF::XRefSection and # Type::Trailer. # # The +xref_section+ needs to contain an entry for this cross-reference stream and it is # necessary that this entry is the one with the highest byte position (for calculating the # correct /W entry). # # The given cross-reference section is *not* stored but only used to rewrite the associated # stream to reflect the cross-reference section. The dictionary is updated with the # information from the trailer and the needed entries for the cross-reference section. # # If there are changes to the cross-reference section or trailer, this method has to be # invoked again. def update_with_xref_section_and_trailer(xref_section, trailer) value.replace(trailer) value[:Type] = :XRef write_xref_section_to_stream(xref_section) set_filter(:FlateDecode, Columns: value[:W].inject(:+), Predictor: 12) end private TYPE_FREE = 0 #:nodoc: TYPE_IN_USE = 1 #:nodoc: TYPE_COMPRESSED = 2 #:nodoc: # Parses the stream and returns the resulting HexaPDF::XRefSection object. def parse_xref_section(index, w) xref = XRefSection.new data = stream start_pos = end_pos = 0 w0 = w[0] w1 = w[1] w2 = w[2] index.each_slice(2) do |first_oid, number_of_entries| first_oid.upto(first_oid + number_of_entries - 1) do |oid| # Default for first field: type 1 end_pos = start_pos + w0 type_field = (w0 == 0 ? TYPE_IN_USE : bytes_to_int(data, start_pos, end_pos)) # No default available for second field start_pos = end_pos + w1 field2 = bytes_to_int(data, end_pos, start_pos) # Default for third field is 0 for type 1, otherwise it needs to be specified! end_pos = start_pos + w2 field3 = (w2 == 0 ? 0 : bytes_to_int(data, start_pos, end_pos)) case type_field when TYPE_IN_USE xref.add_in_use_entry(oid, field3, field2) when TYPE_FREE xref.add_free_entry(oid, field3) when TYPE_COMPRESSED xref.add_compressed_entry(oid, field2, field3) else nil # Ignore entry as per PDF1.7 s7.5.8.3 end start_pos = end_pos end end xref end # Converts the bytes of the string from the start index to the end index to an integer. # # The bytes are converted in the big-endian way. def bytes_to_int(string, start_index, end_index) result = string.getbyte(start_index) start_index += 1 while start_index < end_index result = (result << 8) | string.getbyte(start_index) start_index += 1 end result end # Writes the given cross-reference section to the stream and sets the correct /W and /Index # entries for the written data. def write_xref_section_to_stream(xref_section) value[:W], pack_string = calculate_w_entry_and_pack_string(xref_section[oid, gen].pos) value[:Index] = [] stream = ''.b xref_section.each_subsection do |entries| value[:Index] << entries.first.oid << entries.length entries.each do |entry| data = if entry.in_use? [TYPE_IN_USE, entry.pos, entry.gen] elsif entry.free? [TYPE_FREE, 0, 65535] elsif entry.compressed? [TYPE_COMPRESSED, entry.objstm, entry.pos] else raise HexaPDF::Error, "Unsupported cross-reference entry #{entry}" end stream << data.pack(pack_string) end end self.stream = stream end # Returns the /W entry depending on the given maximal number for the second field as well as # the appropriate entry packing string. def calculate_w_entry_and_pack_string(max_number) middle = Math.log(max_number, 255).ceil middle = 4 if middle == 3 pack_string = "C#{'-CnNN'[middle]}n" [[1, middle, 2], pack_string] end end end end