# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2019 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/error' require 'hexapdf/dictionary' require 'hexapdf/stream' require 'hexapdf/type/page_tree_node' require 'hexapdf/content' require 'hexapdf/content/transformation_matrix' module HexaPDF module Type # Represents a page of a PDF document. # # A page object contains the meta information for a page. Most of the fields are independent # from the page's content like the /Dur field. However, some of them (like /Resources or # /UserUnit) influence how or if the page's content can be rendered correctly. # # A number of field values can also be inherited: /Resources, /MediaBox, /CropBox, /Rotate. # Field inheritance means that if a field is not set on the page object itself, the value is # taken from the nearest page tree ancestor that has this value set. # # See: PDF1.7 s7.7.3.3, s7.7.3.4, Pages class Page < Dictionary # The predefined paper sizes in points (1/72 inch): # # * ISO sizes: A0x4, A0x2, A0-A10, B0-B10, C0-C10 # * Letter, Legal, Ledger, Tabloid, Executive PAPER_SIZE = { A0x4: [0, 0, 4768, 6741].freeze, A0x2: [0, 0, 3370, 4768].freeze, A0: [0, 0, 2384, 3370].freeze, A1: [0, 0, 1684, 2384].freeze, A2: [0, 0, 1191, 1684].freeze, A3: [0, 0, 842, 1191].freeze, A4: [0, 0, 595, 842].freeze, A5: [0, 0, 420, 595].freeze, A6: [0, 0, 298, 420].freeze, A7: [0, 0, 210, 298].freeze, A8: [0, 0, 147, 210].freeze, A9: [0, 0, 105, 147].freeze, A10: [0, 0, 74, 105].freeze, B0: [0, 0, 2835, 4008].freeze, B1: [0, 0, 2004, 2835].freeze, B2: [0, 0, 1417, 2004].freeze, B3: [0, 0, 1001, 1417].freeze, B4: [0, 0, 709, 1001].freeze, B5: [0, 0, 499, 709].freeze, B6: [0, 0, 354, 499].freeze, B7: [0, 0, 249, 354].freeze, B8: [0, 0, 176, 249].freeze, B9: [0, 0, 125, 176].freeze, B10: [0, 0, 88, 125].freeze, C0: [0, 0, 2599, 3677].freeze, C1: [0, 0, 1837, 2599].freeze, C2: [0, 0, 1298, 1837].freeze, C3: [0, 0, 918, 1298].freeze, C4: [0, 0, 649, 918].freeze, C5: [0, 0, 459, 649].freeze, C6: [0, 0, 323, 459].freeze, C7: [0, 0, 230, 323].freeze, C8: [0, 0, 162, 230].freeze, C9: [0, 0, 113, 162].freeze, C10: [0, 0, 79, 113].freeze, Letter: [0, 0, 612, 792].freeze, Legal: [0, 0, 612, 1008].freeze, Ledger: [0, 0, 792, 1224].freeze, Tabloid: [0, 0, 1224, 792].freeze, Executive: [0, 0, 522, 756].freeze, }.freeze # Returns the media box for the given paper size. See PAPER_SIZE for the defined paper sizes. def self.media_box(paper_size, orientation: :portrait) unless PAPER_SIZE.key?(paper_size) raise HexaPDF::Error, "Invalid paper size specified: #{paper_size}" end media_box = PAPER_SIZE[paper_size].dup media_box[2], media_box[3] = media_box[3], media_box[2] if orientation == :landscape media_box end # The inheritable fields. INHERITABLE_FIELDS = [:Resources, :MediaBox, :CropBox, :Rotate].freeze # The required inheritable fields. REQUIRED_INHERITABLE_FIELDS = [:Resources, :MediaBox].freeze define_type :Page define_field :Type, type: Symbol, required: true, default: type define_field :Parent, type: :Pages, required: true, indirect: true define_field :LastModified, type: PDFDate, version: '1.3' define_field :Resources, type: :XXResources define_field :MediaBox, type: Rectangle define_field :CropBox, type: Rectangle define_field :BleedBox, type: Rectangle, version: '1.3' define_field :TrimBox, type: Rectangle, version: '1.3' define_field :ArtBox, type: Rectangle, version: '1.3' define_field :BoxColorInfo, type: Dictionary, version: '1.4' define_field :Contents, type: [Stream, PDFArray] define_field :Rotate, type: Integer, default: 0 define_field :Group, type: Dictionary, version: '1.4' define_field :Thumb, type: Stream define_field :B, type: PDFArray, version: '1.1' define_field :Dur, type: Numeric, version: '1.1' define_field :Trans, type: Dictionary, version: '1.1' define_field :Annots, type: PDFArray define_field :AA, type: Dictionary, version: '1.2' define_field :Metadata, type: Stream, version: '1.4' define_field :PieceInfo, type: Dictionary, version: '1.3' define_field :StructParents, type: Integer, version: '1.3' define_field :ID, type: PDFByteString, version: '1.3' define_field :PZ, type: Numeric, version: '1.3' define_field :SeparationInfo, type: Dictionary, version: '1.3' define_field :Tabs, type: Symbol, version: '1.5' define_field :TemplateInstantiated, type: Symbol, version: '1.5' define_field :PresSteps, type: Dictionary, version: '1.5' define_field :UserUnit, type: Numeric, version: '1.6' define_field :VP, type: PDFArray, version: '1.6' # Returns +true+ since page objects must always be indirect. def must_be_indirect? true end # Returns the value for the entry +name+. # # If +name+ is an inheritable value and the value has not been set on the page object, its # value is retrieved from the ancestor page tree nodes. # # See: Dictionary#[] def [](name) if value[name].nil? && INHERITABLE_FIELDS.include?(name) node = self node = node[:Parent] while node.value[name].nil? && node[:Parent] node == self || node.value[name].nil? ? super : node[name] else super end end # Copies the page's inherited values from the ancestor page tree nodes into a hash and returns # the hash. # # The hash can then be used to update the page itself (e.g. when moving a page from one # position to another) or another page (e.g. when importing a page from another document). def copy_inherited_values INHERITABLE_FIELDS.each_with_object({}) do |name, hash| hash[name] = HexaPDF::Object.deep_copy(self[name]) if value[name].nil? end end # :call-seq: # page.box(type = :media) -> box # page.box(type = :media, rectangle) -> rectangle # # If no +rectangle+ is given, returns the rectangle defining a certain kind of box for the # page. Otherwise sets the value for the given box type to +rectangle+ (an array with four # values or a HexaPDF::Rectangle). # # This method should be used instead of directly accessing any of /MediaBox, /CropBox, # /BleedBox, /ArtBox or /TrimBox because it also takes the fallback values into account! # # The following types are allowed: # # :media:: # The media box defines the boundaries of the medium the page is to be printed on. # # :crop:: # The crop box defines the region to which the contents of the page should be clipped # when it is displayed or printed. The default is the media box. # # :bleed:: # The bleed box defines the region to which the contents of the page should be clipped # when output in a production environment. The default is the crop box. # # :trim:: # The trim box defines the intended dimensions of the page after trimming. The default # value is the crop box. # # :art:: # The art box defines the region of the page's meaningful content as intended by the # author. The default is the crop box. # # See: PDF1.7 s14.11.2 def box(type = :media, rectangle = nil) if rectangle case type when :media, :crop, :bleed, :trim, :art self["#{type.capitalize}Box".to_sym] = rectangle else raise ArgumentError, "Unsupported page box type provided: #{type}" end else case type when :media then self[:MediaBox] when :crop then self[:CropBox] || self[:MediaBox] when :bleed then self[:BleedBox] || self[:CropBox] || self[:MediaBox] when :trim then self[:TrimBox] || self[:CropBox] || self[:MediaBox] when :art then self[:ArtBox] || self[:CropBox] || self[:MediaBox] else raise ArgumentError, "Unsupported page box type provided: #{type}" end end end # Returns the orientation of the media box, either :portrait or :landscape. def orientation box = self[:MediaBox] rotation = self[:Rotate] if (box.height > box.width && (rotation == 0 || rotation == 180)) || (box.height < box.width && (rotation == 90 || rotation == 270)) :portrait else :landscape end end # Rotates the page +angle+ degrees counterclockwise where +angle+ has to be a multiple of 90. # # Positive values rotate the page to the left, negative values to the right. If +flatten+ is # +true+, the rotation is not done via the page's meta data but by "rotating" the canvas # itself. # # Note that the :Rotate key of a page object describes the angle in a clockwise orientation # but this method uses counterclockwise rotation to be consistent with other rotation methods # (e.g. HexaPDF::Content::Canvas#rotate). def rotate(angle, flatten: false) if angle % 90 != 0 raise ArgumentError, "Page rotation has to be multiple of 90 degrees" end cw_angle = (self[:Rotate] - angle) % 360 if flatten delete(:Rotate) return if cw_angle == 0 matrix, llx, lly, urx, ury = \ case cw_angle when 90 [HexaPDF::Content::TransformationMatrix.new(0, -1, 1, 0), box.right, box.bottom, box.left, box.top] when 180 [HexaPDF::Content::TransformationMatrix.new(-1, 0, 0, -1), box.right, box.top, box.left, box.bottom] when 270 [HexaPDF::Content::TransformationMatrix.new(0, 1, -1, 0), box.left, box.top, box.right, box.bottom] end [:MediaBox, :CropBox, :BleedBox, :TrimBox, :ArtBox].each do |box| next unless key?(box) self[box].value = matrix.evaluate(llx, lly).concat(matrix.evaluate(urx, ury)) end before_contents = document.add({}, stream: " q #{matrix.to_a.join(' ')} cm ") after_contents = document.add({}, stream: " Q ") self[:Contents] = [before_contents, *self[:Contents], after_contents] else self[:Rotate] = cw_angle end end # Returns the concatenated stream data from the content streams as binary string. # # Note: Any modifications done to the returned value *won't* be reflected in any of the # streams' data! def contents Array(self[:Contents]).each_with_object("".b) do |content_stream, content| content << " " unless content.empty? content << document.deref(content_stream).stream end end # Replaces the contents of the page with the given string. # # This is done by deleting all but the first content stream and reusing this content stream; # or by creating a new one if no content stream exists. def contents=(data) first, *rest = self[:Contents] rest.each {|stream| document.delete(stream) } if first self[:Contents] = first document.deref(first).stream = data else self[:Contents] = document.add({Filter: :FlateDecode}, stream: data) end end # Returns the possibly inherited resource dictionary which is automatically created if it # doesn't exist. def resources self[:Resources] ||= document.wrap({}, type: :XXResources) end # Processes the content streams associated with the page with the given processor object. # # See: HexaPDF::Content::Processor def process_contents(processor) self[:Resources] = {} if self[:Resources].nil? processor.resources = self[:Resources] Content::Parser.parse(contents, processor) end # Returns the index of the page in the page tree. def index idx = 0 node = self while (parent_node = node[:Parent]) parent_node[:Kids].each do |kid| kid = document.deref(kid) break if kid.data == node.data idx += (kid.type == :Page ? 1 : kid[:Count]) end node = parent_node end idx end # Returns the requested type of canvas for the page. # # The canvas object is cached once it is created so that its graphics state is correctly # retained without the need for parsing its contents. # # type:: # Can either be # * :page for getting the canvas for the page itself (only valid for initially empty pages) # * :overlay for getting the canvas for drawing over the page contents # * :underlay for getting the canvas for drawing unter the page contents def canvas(type: :page) unless [:page, :overlay, :underlay].include?(type) raise ArgumentError, "Invalid value for 'type', expected: :page, :underlay or :overlay" end cache_key = "#{type}_canvas".intern return document.cache(@data, cache_key) if document.cached?(@data, cache_key) if type == :page && key?(:Contents) raise HexaPDF::Error, "Cannot get the canvas for a page with contents" end contents = self[:Contents] if contents.nil? page_canvas = document.cache(@data, :page_canvas, Content::Canvas.new(self)) self[:Contents] = document.add({Filter: :FlateDecode}, stream: page_canvas.stream_data) end if type == :overlay || type == :underlay underlay_canvas = document.cache(@data, :underlay_canvas, Content::Canvas.new(self)) overlay_canvas = document.cache(@data, :overlay_canvas, Content::Canvas.new(self)) stream = HexaPDF::StreamData.new do Fiber.yield(" q ") fiber = underlay_canvas.stream_data.fiber while fiber.alive? && (data = fiber.resume) Fiber.yield(data) end " Q q " end underlay = document.add({Filter: :FlateDecode}, stream: stream) stream = HexaPDF::StreamData.new do Fiber.yield(" Q ") fiber = overlay_canvas.stream_data.fiber while fiber.alive? && (data = fiber.resume) Fiber.yield(data) end end overlay = document.add({Filter: :FlateDecode}, stream: stream) self[:Contents] = [underlay, *self[:Contents], overlay] end document.cache(@data, cache_key) end # Creates a Form XObject from the page's dictionary and contents for the given PDF document. # # If +reference+ is true, the page's contents is referenced when possible to avoid unnecessary # decoding/encoding. # # Note 1: The created Form XObject is *not* added to the document automatically! # # Note 2: If +reference+ is false and if a canvas is used on this page (see #canvas), this # method should only be called once the contents of the page has been fully defined. The # reason is that during the copying of the content stream data the contents may be modified to # make it a fully valid content stream. def to_form_xobject(reference: true) first, *rest = self[:Contents] stream = if !first nil elsif !reference || !rest.empty? || first.raw_stream.kind_of?(String) contents else first.raw_stream end dict = { Type: :XObject, Subtype: :Form, BBox: HexaPDF::Object.deep_copy(box(:crop)), Resources: HexaPDF::Object.deep_copy(self[:Resources]), Filter: :FlateDecode, } document.wrap(dict, stream: stream) end private # Ensures that the required inheritable fields are set. def perform_validation(&block) super REQUIRED_INHERITABLE_FIELDS.each do |name| next if self[name] yield("Inheritable page field #{name} not set", name == :Resources) self[:Resources] = {} self[:Resources].validate(&block) end end end end end