# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2019 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/error' require 'hexapdf/content/graphics_state' require 'hexapdf/utils/bit_stream' module HexaPDF module ImageLoader # This class is used for loading images in the PNG format from files or IO streams. # # It can handle all five types of PNG images: greyscale w/wo alpha, truecolor w/wo alpha and # indexed-color. Furthermore, it recognizes the gAMA, cHRM, sRGB and tRNS chunks and handles # them appropriately. However, Adam7 interlaced images are not supported! # # Note that greyscale, truecolor and indexed-color images with alpha need to be decoded to get # the alpha channel which takes time. # # All PNG specification section references are in reference to http://www.w3.org/TR/PNG/. # # See: PDF1.7 s7.4.4., s8.9 class PNG # The magic marker that tells us if the file/IO contains an image in PNG format. # # See: PNG s5.2 MAGIC_FILE_MARKER = "\x89PNG\r\n\x1A\n".b # The color type for PNG greyscale images without alpha, see PNG s11.2.2 GREYSCALE = 0 # The color type for PNG truecolor images without alpha, see PNG s11.2.2 TRUECOLOR = 2 # The color type for PNG indexed images with/without alpha, see PNG s11.2.2 INDEXED = 3 # The color type for PNG greyscale images with alpha, see PNG s11.2.2 GREYSCALE_ALPHA = 4 # The color type for PNG truecolor images with alpha, see PNG s11.2.2 TRUECOLOR_ALPHA = 6 # Mapping from sRGB chunk rendering intent byte to PDF rendering intent name. RENDERING_INTENT_MAP = { 0 => Content::RenderingIntent::PERCEPTUAL, 1 => Content::RenderingIntent::RELATIVE_COLORIMETRIC, 2 => Content::RenderingIntent::SATURATION, 3 => Content::RenderingIntent::ABSOLUTE_COLORIMETRIC, }.freeze # The primary chromaticities and white point used by the sRGB specification. SRGB_CHRM = [0.3127, 0.329, 0.64, 0.33, 0.3, 0.6, 0.15, 0.06].freeze # :call-seq: # PNG.handles?(filename) -> true or false # PNG.handles?(io) -> true or false # # Returns +true+ if the given file or IO stream can be handled, ie. if it contains an image # in PNG format. def self.handles?(file_or_io) if file_or_io.kind_of?(String) File.read(file_or_io, 8, mode: 'rb') == MAGIC_FILE_MARKER else file_or_io.rewind file_or_io.read(8) == MAGIC_FILE_MARKER end end # :call-seq: # PNG.load(document, filename) -> image_obj # PNG.load(document, io) -> image_obj # # Creates a PDF image object from the PNG file or IO stream. def self.load(document, file_or_io) new(document, file_or_io).load end def initialize(document, io) #:nodoc: @document = document @io = io @color_type = nil @intent = nil @chrm = nil @gamma = nil end def load #:nodoc: with_io do |io| io.seek(8, IO::SEEK_SET) dict = { Type: :XObject, Subtype: :Image, } while true length, type = io.read(8).unpack('Na4') # PNG s5.3 case type when 'IDAT' # PNG s11.2.4 idat_offset = io.pos - 8 break when 'IHDR' # PNG s11.2.2 values = io.read(length).unpack('NNC5') dict[:Width] = values[0] dict[:Height] = values[1] dict[:BitsPerComponent] = values[2] @color_type = values[3] if values[4] != 0 raise HexaPDF::Error, "Unsupported PNG compression method" elsif values[5] != 0 raise HexaPDF::Error, "Unsupported PNG filter method" elsif values[6] != 0 raise HexaPDF::Error, "Unsupported PNG interlace method" end when 'PLTE' # PNG s11.2.3 if @color_type == INDEXED palette = io.read(length) hival = (palette.size / 3) - 1 if dict[:BitsPerComponent] == 8 palette = @document.add({Filter: :FlateDecode}, stream: palette) end dict[:ColorSpace] = [:Indexed, color_space, hival, palette] else io.seek(length, IO::SEEK_CUR) end when 'tRNS' # PNG s11.3.2 if @color_type == INDEXED trns = io.read(length).unpack('C*') elsif @color_type == TRUECOLOR || @color_type == GREYSCALE dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten else io.seek(length, IO::SEEK_CUR) end when 'sRGB' # PNG s11.3.3.5 @intent = io.read(length).unpack1('C') dict[:Intent] = RENDERING_INTENT_MAP[@intent] @chrm = SRGB_CHRM @gamma = 2.2 when 'gAMA' # PNG s11.3.3.2 gamma = 100_000.0 / io.read(length).unpack1('N') unless @intent || gamma == 1.0 # sRGB trumps gAMA @gamma = gamma @chrm ||= SRGB_CHRM # don't overwrite data from a cHRM chunk end when 'cHRM' # PNG s11.3.3.1 chrm = io.read(length) @chrm = chrm.unpack('N8').map {|v| v / 100_000.0 } unless @intent # sRGB trumps cHRM else io.seek(length, IO::SEEK_CUR) end io.seek(4, IO::SEEK_CUR) # don't check the CRC end dict[:ColorSpace] ||= color_space decode_parms = { Predictor: 15, Colors: @color_type == TRUECOLOR || @color_type == TRUECOLOR_ALPHA ? 3 : 1, BitsPerComponent: dict[:BitsPerComponent], Columns: dict[:Width], } if @color_type == TRUECOLOR_ALPHA || @color_type == GREYSCALE_ALPHA image_data, mask_data = separate_alpha_channel(idat_offset, decode_parms) add_smask_image(dict, mask_data) stream = HexaPDF::StreamData.new(lambda { image_data }, filter: :FlateDecode, decode_parms: decode_parms) else if @color_type == INDEXED && trns mask_data = alpha_mask_for_indexed_image(idat_offset, decode_parms, trns) add_smask_image(dict, mask_data, from_indexed: true) end stream = HexaPDF::StreamData.new(image_data_proc(idat_offset), filter: :FlateDecode, decode_parms: decode_parms) end obj = @document.add(dict, stream: stream) obj.set_filter(:FlateDecode, decode_parms) obj end end private # Yields the IO object for reading the PNG image. # # Automatically handles files and IO streams. def with_io io = (@io.kind_of?(String) ? File.new(@io, 'rb') : @io) yield(io) ensure io.close if @io.kind_of?(String) end # Returns the PDF color space definition that should be used with the PDF image of the PNG # file. # # In the case of an indexed PNG image, this returns the definition for the color space # underlying the palette. def color_space if @color_type == GREYSCALE || @color_type == GREYSCALE_ALPHA if @gamma [:CalGray, {WhitePoint: [1.0, 1.0, 1.0], Gamma: @gamma}] else :DeviceGray end elsif @gamma || @chrm dict = @chrm ? calrgb_definition_from_chrm(*@chrm) : {} if @gamma dict[:Gamma] = [@gamma, @gamma, @gamma] dict[:WhitePoint] ||= [1.0, 1.0, 1.0] end [:CalRGB, dict] else :DeviceRGB end end # Returns a hash for a CalRGB color space definition using the x,y chromaticity coordinates # of the white point and the red, green and blue primaries. # # See: PDF1.7 s8.6.5.3 def calrgb_definition_from_chrm(xw, yw, xr, yr, xg, yg, xb, yb) z = yw * ((xg - xb) * yr - (xr - xb) * yg + (xr - xg) * yb) mya = yr * ((xg - xb) * yw - (xw - xb) * yg + (xw - xg) * yb) / z mxa = mya * xr / yr mza = mya * ((1 - xr) / yr - 1) myb = - (yg * ((xr - xb) * yw - (xw - xb) * yr + (xw - xr) * yb)) / z mxb = myb * xg / yg mzb = myb * ((1 - xg) / yg - 1) myc = yb * ((xr - xg) * yw - (xw - xg) * yr + (xw - xr) * yg) / z mxc = myc * xb / yb mzc = myc * ((1 - xb) / yb - 1) mxw = mxa + mxb + mxc myw = 1.0 # mya + myb + myc mzw = mza + mzb + mzc {WhitePoint: [mxw, myw, mzw], Matrix: [mxa, mya, mza, mxb, myb, mzb, mxc, myc, mzc]} end # Adds a source mask image to the image described by +dict+ using +mask_data+ as the source # data. # # If the optional argument +from_indexed+ is +true+, it is assumed that the +mask_data+ was # created from an indexed PNG and is not deflate encoded. def add_smask_image(dict, mask_data, from_indexed: false) decode_parms = { Predictor: 15, Colors: 1, BitsPerComponent: (from_indexed ? 8 : dict[:BitsPerComponent]), Columns: dict[:Width], } stream_opts = (from_indexed ? {} : {filter: :FlateDecode, decode_parms: decode_parms}) stream = HexaPDF::StreamData.new(lambda { mask_data }, **stream_opts) smask_dict = { Type: :XObject, Subtype: :Image, Width: dict[:Width], Height: dict[:Height], ColorSpace: :DeviceGray, BitsPerComponent: (from_indexed ? 8 : dict[:BitsPerComponent]), } smask = @document.add(smask_dict, stream: stream) smask.set_filter(:FlateDecode, decode_parms) dict[:SMask] = smask end # Returns a Proc object that can be used with a StreamData object to read the image data. # # This method is efficient because it doesn't need to uncompress or filter the image data # but it only works for PNG images without embedded alpha channel data. def image_data_proc(offset) lambda do with_io do |io| io.seek(offset, IO::SEEK_SET) while true length, type = io.read(8).unpack('Na4') # PNG s5.3 break if type != 'IDAT' chunk_size = @document.config['io.chunk_size'] while length > 0 chunk_size = length if chunk_size > length Fiber.yield(io.read(chunk_size)) length -= chunk_size end io.seek(4, IO::SEEK_CUR) end end nil end end # Separates the color data from the alpha data and returns an array containing the image and # alpha data, both deflate encoded with predictor. # # Since we need to decompress the PNG chunks and extract the color/alpha bytes this method # is not very fast but gets the job done as fast as possible in plain Ruby. def separate_alpha_channel(offset, decode_parms) bytes_per_colors = (decode_parms[:BitsPerComponent] * decode_parms[:Colors] + 7) / 8 bytes_per_alpha = (decode_parms[:BitsPerComponent] + 7) / 8 bytes_per_row = (decode_parms[:Columns] * decode_parms[:BitsPerComponent] * (decode_parms[:Colors] + 1) + 7) / 8 + 1 image_data = ''.b mask_data = ''.b flate_decode = @document.config.constantize('filter.map', :FlateDecode) source = flate_decode.decoder(Fiber.new(&image_data_proc(offset))) data = ''.b while source.alive? && (new_data = source.resume) data << new_data while data.length >= bytes_per_row i = 1 image_data << data.getbyte(0) mask_data << data.getbyte(0) while i < bytes_per_row bytes_per_colors.times {|j| image_data << data.getbyte(i + j) } i += bytes_per_colors bytes_per_alpha.times {|j| mask_data << data.getbyte(i + j) } i += bytes_per_alpha end data = data[bytes_per_row..-1] end end image_data = Filter.string_from_source(flate_decode.encoder(Fiber.new { image_data })) mask_data = Filter.string_from_source(flate_decode.encoder(Fiber.new { mask_data })) [image_data, mask_data] end # Creates the alpha mask source data for an indexed PNG with alpha values. # # The returned data is *not* deflate encoded! def alpha_mask_for_indexed_image(offset, decode_parms, trns) width = decode_parms[:Columns] bpc = decode_parms[:BitsPerComponent] bytes_per_row = (width * bpc + 7) / 8 + 1 flate_decode = @document.config.constantize('filter.map', :FlateDecode) source = flate_decode.decoder(Fiber.new(&image_data_proc(offset))) mask_data = ''.b stream = HexaPDF::Utils::BitStreamReader.new while source.alive? && (data = source.resume) stream.append_data(data) while stream.remaining_bits / 8 >= bytes_per_row stream.read(8) # read filter byte i = 0 while i < width index = stream.read(bpc) mask_data << (trns[index] || 255) i += 1 end stream.read(8 - ((width * bpc) % 8)) if bpc != 8 # read remaining fill bits end end mask_data end end end end