=begin This file is part of Origami, PDF manipulation framework for Ruby Copyright (C) 2016 Guillaume Delugré. Origami is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Origami is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Origami. If not, see . =end module Origami class PDF # # Tries to strip any xrefs information off the document. # def remove_xrefs @revisions.reverse_each do |rev| if rev.has_xrefstm? delete_object(rev.xrefstm.reference) end if rev.trailer.XRefStm.is_a?(Integer) xrefstm = get_object_by_offset(rev.trailer.XRefStm) delete_object(xrefstm.reference) if xrefstm.is_a?(XRefStream) end rev.xrefstm = rev.xreftable = nil end end end class InvalidXRefError < Error #:nodoc: end # # Class representing a Cross-reference information. # class XRef FREE = "f" USED = "n" FIRSTFREE = 65535 @@regexp = /(?\d{10}) (?\d{5}) (?n|f)(\r\n| \r| \n)/ attr_accessor :offset, :generation, :state # # Creates a new XRef. # _offset_:: The file _offset_ of the referenced Object. # _generation_:: The generation number of the referenced Object. # _state_:: The state of the referenced Object (FREE or USED). # def initialize(offset, generation, state) @offset, @generation, @state = offset, generation, state end def self.parse(stream) #:nodoc: if stream.scan(@@regexp).nil? raise InvalidXRefError, "Invalid XRef format" end offset = stream['offset'].to_i generation = stream['gen'].to_i state = stream['state'] XRef.new(offset, generation, state) end # # Returns true if the associated object is used. # def used? @state == USED end # # Returns true if the associated object is freed. # def free? @state == FREE end # # Outputs self into PDF code. # def to_s off = @offset.to_s.rjust(10, '0') gen = @generation.to_s.rjust(5, '0') "#{off} #{gen} #{@state}" + EOL end def to_xrefstm_data(type_w, field1_w, field2_w) type_w <<= 3 field1_w <<= 3 field2_w <<= 3 type = ((@state == FREE) ? "\000" : "\001").unpack("B#{type_w}")[0] offset = @offset.to_s(2).rjust(field1_w, '0') generation = @generation.to_s(2).rjust(field2_w, '0') [ type , offset, generation ].pack("B#{type_w}B#{field1_w}B#{field2_w}") end class InvalidXRefSubsectionError < Error #:nodoc: end # # Class representing a cross-reference subsection. # A subsection contains a continute set of XRef. # class Subsection include Enumerable @@regexp = Regexp.new("(?\\d+) (?\\d+)" + WHITESPACES + "(\\r?\\n|\\r\\n?)") attr_reader :range # # Creates a new XRef subsection. # _start_:: The number of the first object referenced in the subsection. # _entries_:: An array of XRef. # def initialize(start, entries = []) @entries = entries.dup @range = Range.new(start, start + entries.size - 1) end def self.parse(stream) #:nodoc: if stream.scan(@@regexp).nil? raise InvalidXRefSubsectionError, "Bad subsection format" end start = stream['start'].to_i size = stream['size'].to_i xrefs = [] size.times do xrefs << XRef.parse(stream) end XRef::Subsection.new(start, xrefs) end # # Returns whether this subsection contains information about a particular object. # _no_:: The Object number. # def has_object?(no) @range.include?(no) end # # Returns XRef associated with a given object. # _no_:: The Object number. # def [](no) @entries[no - @range.begin] end # # Processes each XRef in the subsection. # def each(&b) @entries.each(&b) end # # Processes each XRef in the subsection, passing the XRef and the object number to the block. # def each_with_number return enum_for(__method__) { self.size } unless block_given? counter = @range.to_enum @entries.each do |entry| yield(entry, counter.next) end end # # The number of entries in the subsection. # def size @entries.size end # # Outputs self into PDF code. # def to_s section = "#{@range.begin} #{@range.end - @range.begin + 1}" + EOL @entries.each do |xref| section << xref.to_s end section end end class InvalidXRefSectionError < Error #:nodoc: end # # Class representing a Cross-reference table. # A section contains a set of XRefSubsection. # class Section include Enumerable TOKEN = "xref" @@regexp_open = Regexp.new(WHITESPACES + TOKEN + WHITESPACES + "(\\r?\\n|\\r\\n?)") @@regexp_sub = Regexp.new("(\\d+) (\\d+)" + WHITESPACES + "(\\r?\\n|\\r\\n?)") # # Creates a new XRef section. # _subsections_:: An array of XRefSubsection. # def initialize(subsections = []) @subsections = subsections end def self.parse(stream) #:nodoc: if stream.skip(@@regexp_open).nil? raise InvalidXRefSectionError, "No xref token found" end subsections = [] while stream.match?(@@regexp_sub) do subsections << XRef::Subsection.parse(stream) end XRef::Section.new(subsections) end # # Appends a new subsection. # _subsection_:: A XRefSubsection. # def <<(subsection) @subsections << subsection end # # Returns a XRef associated with a given object. # _no_:: The Object number. # def [](no) @subsections.each do |s| return s[no] if s.has_object?(no) end nil end alias find [] # # Processes each XRef in each Subsection. # def each(&b) return enum_for(__method__) { self.size } unless block_given? @subsections.each do |subsection| subsection.each(&b) end end # # Processes each XRef in each Subsection, passing the XRef and the object number. # def each_with_number(&b) return enum_for(__method__) { self.size } unless block_given? @subsections.each do |subsection| subsection.each_with_number(&b) end end # # Processes each Subsection in this table. # def each_subsection(&b) @subsections.each(&b) end # # Returns an Array of Subsection. # def subsections @subsections end # # Clear all the entries. # def clear @subsections.clear end # # The number of XRef entries in the Section. # def size @subsections.reduce(0) { |total, subsection| total + subsection.size } end # # Outputs self into PDF code. # def to_s "xref" << EOL << @subsections.join end end end # # An xref poiting to an Object embedded in an ObjectStream. # class XRefToCompressedObject attr_accessor :objstmno, :index def initialize(objstmno, index) @objstmno = objstmno @index = index end def to_xrefstm_data(type_w, field1_w, field2_w) type_w <<= 3 field1_w <<= 3 field2_w <<= 3 type = "\002".unpack("B#{type_w}")[0] objstmno = @objstmno.to_s(2).rjust(field1_w, '0') index = @index.to_s(2).rjust(field2_w, '0') [ type , objstmno, index ].pack("B#{type_w}B#{field1_w}B#{field2_w}") end def used?; true end def free?; false end end class InvalidXRefStreamObjectError < InvalidStreamObjectError ; end # # Class representing a XRef Stream. # class XRefStream < Stream include Enumerable include StandardObject XREF_FREE = 0 XREF_USED = 1 XREF_COMPRESSED = 2 # # Xref fields # field :Type, :Type => Name, :Default => :XRef, :Required => true, :Version => "1.5" field :Size, :Type => Integer, :Required => true field :Index, :Type => Array.of(Integer, Integer) field :Prev, :Type => Integer field :W, :Type => Array.of(Integer, length: 3), :Required => true # # Trailer fields # field :Root, :Type => Catalog, :Required => true field :Encrypt, :Type => Encryption::Standard::Dictionary field :Info, :Type => Metadata field :ID, :Type => Array.of(String, length: 2) def initialize(data = "", dictionary = {}) super(data, dictionary) @xrefs = nil end def entries load! if @xrefs.nil? @xrefs end # # Returns XRef entries present in this stream. # def pre_build #:nodoc: load! if @xrefs.nil? self.W = [ 1, 2, 2 ] unless self.key?(:W) self.Size = @xrefs.length + 1 save! super end # # Adds an XRef to this Stream. # def <<(xref) load! if @xrefs.nil? @xrefs << xref end # # Iterates over each XRef present in the stream. # def each(&b) load! if @xrefs.nil? @xrefs.each(&b) end # # Iterates over each XRef present in the stream, passing the XRef and its object number. # def each_with_number return enum_for(__method__) unless block_given? load! if @xrefs.nil? ranges = object_ranges xrefs = @xrefs.to_enum ranges.each do |range| range.each do |no| begin yield(xrefs.next, no) rescue StopIteration raise InvalidXRefStreamObjectError, "Range is bigger than number of entries" end end end end # # Returns an XRef matching this object number. # def find(no) load! if @xrefs.nil? ranges = object_ranges index = 0 ranges.each do |range| return @xrefs[index + no - range.begin] if range.cover?(no) index += range.size end nil end def clear self.data = '' @xrefs = [] self.Index = [] end private def object_ranges load! if @xrefs.nil? if self.key?(:Index) ranges = self.Index unless ranges.is_a?(Array) and ranges.length.even? and ranges.all?{|i| i.is_a?(Integer)} raise InvalidXRefStreamObjectError, "Index must be an even Array of integers" end ranges.each_slice(2).map { |start, length| Range.new(start.to_i, start.to_i + length.to_i - 1) } else [ 0...@xrefs.size ] end end def load! #:nodoc: if @xrefs.nil? and self.key?(:W) decode! type_w, field1_w, field2_w = field_widths entrymask = "B#{type_w << 3}B#{field1_w << 3}B#{field2_w << 3}" size = @data.size / (type_w + field1_w + field2_w) xentries = @data.unpack(entrymask * size).map!{|field| field.to_i(2) } @xrefs = [] xentries.each_slice(3) do |type, field1, field2| case type when XREF_FREE @xrefs << XRef.new(field1, field2, XRef::FREE) when XREF_USED @xrefs << XRef.new(field1, field2, XRef::USED) when XREF_COMPRESSED @xrefs << XRefToCompressedObject.new(field1, field2) end end else @xrefs = [] end end def save! #:nodoc: self.data = "" type_w, field1_w, field2_w = self.W @xrefs.each do |xref| @data << xref.to_xrefstm_data(type_w, field1_w, field2_w) end encode! end # # Check and return the internal field widths. # def field_widths widths = self.W unless widths.is_a?(Array) and widths.length == 3 and widths.all? {|w| w.is_a?(Integer) and w >= 0 } raise InvalidXRefStreamObjectError, "Invalid W field: #{widths}" end widths end end end