# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2021 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/error' require 'hexapdf/dictionary' require 'hexapdf/filter' module HexaPDF # Container for stream data that is more complex than a string. # # This helper class wraps all information necessary to read stream data by using a Fiber object # (see HexaPDF::Filter). The underlying data either comes from an IO object, a file represented by # its file name or a Fiber defined via a Proc object. # # Additionally, the #filter and #decode_parms can be set to indicate that the data returned from # the Fiber needs to be post-processed. The +filter+ and +decode_parms+ are automatically # normalized to arrays on assignment to ease further processing. class StreamData # The filter(s) that need to be applied for getting the decoded stream data. attr_reader :filter # The decoding parameters associated with the +filter+(s). attr_reader :decode_parms # :call-seq: # StreamData.new(io) -> stream_data # StreamData.new(str) -> stream_data # StreamData.new(proc) -> stream_data # StreamData.new { block } -> stream_data # # Creates a new StreamData object for the given +source+ and with the given options. # # The +source+ can be: # # * An IO stream which is read starting from a specific +offset+ for a specific +length+ # # * A string which is interpreted as a file name and read starting from a specific +offset+ # * and for a specific +length+ # # * A Proc object (that is converted to a Fiber when needed) in which case the +offset+ and # value is ignored. The Proc object can also be passed by using a block. def initialize(source = nil, offset: nil, length: nil, filter: nil, decode_parms: nil, &block) if source.nil? && !block_given? raise ArgumentError, "Either a source object or a block must be given" end @source = source || block @offset = offset @length = length @filter = [filter].flatten.compact @decode_parms = [decode_parms].flatten freeze end # Returns a Fiber for getting at the data of the stream represented by this object. def fiber(chunk_size = 0) if @source.kind_of?(Proc) FiberWithLength.new(@length, &@source) elsif @source.kind_of?(String) HexaPDF::Filter.source_from_file(@source, pos: @offset || 0, length: @length || -1, chunk_size: chunk_size) else HexaPDF::Filter.source_from_io(@source, pos: @offset || 0, length: @length || -1, chunk_size: chunk_size) end end # Returns whether this stream data object is equal to the other stream data object. def ==(other) other.kind_of?(StreamData) && source == other.source && offset == other.offset && length == other.length && filter == other.filter && decode_parms == other.decode_parms end protected # The source. attr_reader :source # The optional offset into the bytes provided by source. attr_reader :offset # The optional number of bytes to use starting from offset. attr_reader :length end # Implements Stream objects of the PDF object system. # # == Stream Objects # # A stream may also be associated with a PDF object but only if the value is a PDF dictionary. # This associated dictionary further describes the stream, like its length or how it is encoded. # # Such a stream object in PDF contains string data but of possibly unlimited length. Therefore # it is used for large amounts of data like images, page descriptions or embedded files. # # The basic Object class cannot hold stream data, only this subclass contains the necessary # methods to conveniently work with the stream data! # # Note that support for external streams (/F, /FFilter, /FDecodeParms) is not yet implemented! # # See: PDF1.7 s7.3.8, Dictionary class Stream < Dictionary define_field :Length, type: Integer # not required, will be auto-filled when writing define_field :Filter, type: [Symbol, PDFArray] define_field :DecodeParms, type: [Dictionary, PDFArray] define_field :F, type: :Filespec, version: '1.2' define_field :FFilter, type: [Symbol, PDFArray], version: '1.2' define_field :FDecodeParms, type: [Dictionary, PDFArray], version: '1.2' define_field :DL, type: Integer # Stream objects must always be indirect. def must_be_indirect? true end # Assigns a new stream data object. # # The +stream+ argument can be a HexaPDF::StreamData object, a String object or +nil+. # # If +stream+ is +nil+, an empty binary string is used instead. def stream=(stream) data.stream = stream after_data_change end # Returns the (possibly decoded) stream data as string. # # Note that modifications done to the returned string are not reflected in the Stream object # itself. The modified string must explicitly be assigned via #stream= to take effect. def stream if data.stream.kind_of?(String) data.stream.dup else HexaPDF::Filter.string_from_source(stream_decoder) end end # Returns the raw stream object. # # The returned value can be of many different types (see #stream=). For working with the # decoded stream contents use #stream. def raw_stream data.stream end # Returns the Fiber representing the unprocessed content of the stream. def stream_source if data.stream.kind_of?(String) HexaPDF::Filter.source_from_string(data.stream) else data.stream.fiber(config['io.chunk_size']) end end # Returns the decoder Fiber for the stream data. # # See the Filter module for more information on how to work with the fiber. def stream_decoder source = stream_source if data.stream.kind_of?(StreamData) data.stream.filter.zip(data.stream.decode_parms) do |filter, decode_parms| source = filter_for_name(filter).decoder(source, decode_parms) end end source end # :call-seq: # stream.stream_encoder # # Returns the encoder Fiber for the stream data. # # See the Filter module for more information on how to work with the fiber. def stream_encoder(source = stream_source) encoder_data = [document.unwrap(self[:Filter])].flatten. zip([document.unwrap(self[:DecodeParms])].flatten). delete_if {|f, _| f.nil? } if data.stream.kind_of?(StreamData) decoder_data = data.stream.filter.zip(data.stream.decode_parms) while !decoder_data.empty? && !encoder_data.empty? && decoder_data.last == encoder_data.last decoder_data.pop encoder_data.pop end decoder_data.each do |filter, decode_parms| source = filter_for_name(filter).decoder(source, decode_parms) end end encoder_data.reverse!.each do |filter, decode_parms| source = filter_for_name(filter).encoder(source, decode_parms) end source end # Sets the filters that should be used for encoding the stream. # # The arguments +filter+ as well as +decode_parms+ can either be a single items or arrays. # # The filters have to be specified in the *decoding order*! For example, if the filters would # be [:A85, :Fl], the stream would first be encoded with the Flate and then with the ASCII85 # filter. def set_filter(filter, decode_parms = nil) if filter.nil? || (filter.kind_of?(Array) && filter.empty?) delete(:Filter) else self[:Filter] = filter end if decode_parms.nil? || (decode_parms.kind_of?(Array) && decode_parms.empty?) || !key?(:Filter) delete(:DecodeParms) else self[:DecodeParms] = decode_parms end end private # Makes sure that the stream data is either a String or a HexaPDF::StreamData object. def after_data_change super data.stream ||= ''.b unless data.stream.kind_of?(StreamData) || data.stream.kind_of?(String) raise ArgumentError, "Object of class #{data.stream.class} cannot be used as stream value" end end # Returns the filter object that corresponds to the given filter name. # # See: HexaPDF::Filter def filter_for_name(filter_name) config.constantize('filter.map', filter_name) do raise HexaPDF::Error, "Unknown stream filter '#{filter_name}' encountered" end end # :nodoc: # A mapping from short name to long name for filters. FILTER_MAP = {AHx: :ASCIIHexDecode, A85: :ASCII85Decode, LZW: :LZWDecode, Fl: :FlateDecode, RL: :RunLengthDecode, CCF: :CCITTFaxDecode, DCT: :DCTDecode}.freeze # Validates the /Filter entry so that it contains only long-name filter names. def perform_validation super if value[:Filter].kind_of?(Symbol) && FILTER_MAP.key?(value[:Filter]) yield("A stream's /Filter entry may only use long-form filter names", true) value[:Filter] = FILTER_MAP[value[:Filter]] elsif value[:Filter].kind_of?(Array) value[:Filter].map! do |filter| next filter unless FILTER_MAP.key?(filter) yield("A stream's /Filter entry may only use long-form filter names", true) FILTER_MAP[filter] end end end end end