# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2021 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/font/true_type'
require 'hexapdf/font/cmap'
require 'hexapdf/font/invalid_glyph'
require 'hexapdf/error'
module HexaPDF
module Font
# This class wraps a generic TrueType font object and provides the methods needed for working
# with the font in a PDF context.
#
# TrueType fonts can be represented in two ways in PDF: As a simple font with Subtype TrueType
# or as a composite font using a Type2 CIDFont. The wrapper only supports the composite font
# case because:
#
# * By using a composite font more than 256 characters can be encoded with one font object.
# * Fonts for vertical writing can potentially be used.
# * The PDF specification recommends using a composite font (see PDF1.7 s9.9 at the end).
#
# Additionally, TrueType fonts are *always* embedded.
class TrueTypeWrapper
# Represents a single glyph of the wrapped font.
class Glyph
# The glyph ID.
attr_reader :id
# The string representation of the glyph.
attr_reader :str
# Creates a new Glyph object.
def initialize(font, id, str)
@font = font
@id = id
@str = str
end
# Returns the glyph's minimum x coordinate.
def x_min
@x_min ||= @font[:glyf][id].x_min * 1000.0 / @font[:head].units_per_em
end
# Returns the glyph's maximum x coordinate.
def x_max
@x_max ||= @font[:glyf][id].x_max * 1000.0 / @font[:head].units_per_em
end
# Returns the glyph's minimum y coordinate.
def y_min
@y_min ||= @font[:glyf][id].y_min * 1000.0 / @font[:head].units_per_em
end
# Returns the glyph's maximum y coordinate.
def y_max
@y_max ||= @font[:glyf][id].y_max * 1000.0 / @font[:head].units_per_em
end
# Returns the width of the glyph.
def width
@width ||= @font[:hmtx][id].advance_width * 1000.0 / @font[:head].units_per_em
end
# Returns +false+ since the word spacing parameter is never applied for multibyte font
# encodings where each glyph is encoded using two bytes.
def apply_word_spacing?
false
end
#:nodoc:
def inspect
"#<#{self.class.name} font=#{@font.full_name.inspect} id=#{id} #{str.inspect}>"
end
end
private_constant :Glyph
# Returns the wrapped TrueType font object.
attr_reader :wrapped_font
# Returns the PDF object associated with the wrapper.
attr_reader :pdf_object
# Creates a new object wrapping the TrueType font for the PDF document.
#
# The optional argument +pdf_object+ can be used to set the PDF font object that this wrapper
# should be associated with. If no object is set, a suitable one is automatically created.
#
# If +subset+ is true, the font is subset.
def initialize(document, font, pdf_object: nil, subset: true)
@wrapped_font = font
@subsetter = (subset ? HexaPDF::Font::TrueType::Subsetter.new(font) : nil)
@cmap = font[:cmap].preferred_table
if @cmap.nil?
raise HexaPDF::Error, "No mapping table for Unicode characters found for TTF " \
"font #{font.full_name}"
end
@pdf_object = pdf_object || create_pdf_object(document)
@id_to_glyph = {}
@codepoint_to_glyph = {}
@encoded_glyphs = {}
end
# Returns the type of the font, i.e. :TrueType.
def font_type
:TrueType
end
# Returns the scaling factor for converting font units into PDF units.
def scaling_factor
@scaling_factor ||= 1000.0 / @wrapped_font[:head].units_per_em
end
# Returns +true+ if the wrapped TrueType font will be subset.
def subset?
!@subsetter.nil?
end
# Returns a Glyph object for the given glyph ID.
#
# The optional argument +str+ should be the string representation of the glyph. Only use it if
# it is known,
#
# Note: Although this method is public, it should normally not be used by application code!
def glyph(id, str = nil)
@id_to_glyph[id] ||=
if id >= 0 && id < @wrapped_font[:maxp].num_glyphs
Glyph.new(@wrapped_font, id, str || (+'' << (@cmap.gid_to_code(id) || 0xFFFD)))
else
@pdf_object.document.config['font.on_missing_glyph'].call("\u{FFFD}", self)
end
end
# Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
def decode_utf8(str)
str.codepoints.map! do |c|
@codepoint_to_glyph[c] ||=
if (gid = @cmap[c])
glyph(gid, +'' << c)
else
@pdf_object.document.config['font.on_missing_glyph'].call(+'' << c, self)
end
end
end
# Encodes the glyph and returns the code string.
def encode(glyph)
(@encoded_glyphs[glyph.id] ||=
begin
if glyph.kind_of?(InvalidGlyph)
raise HexaPDF::Error, "Glyph for #{glyph.str.inspect} missing"
end
if @subsetter
[[@subsetter.use_glyph(glyph.id)].pack('n'), glyph]
else
[[glyph.id].pack('n'), glyph]
end
end)[0]
end
private
# Creates a Type0 font object representing the TrueType font.
#
# The returned font object contains only information available at creation time, so no
# information about glyph specific attributes like width. The missing information is added
# before the PDF document gets written.
def create_pdf_object(document)
fd = document.add({Type: :FontDescriptor,
FontName: @wrapped_font.font_name.intern,
FontWeight: @wrapped_font.weight,
Flags: 0,
FontBBox: @wrapped_font.bounding_box.map {|m| m * scaling_factor },
ItalicAngle: @wrapped_font.italic_angle || 0,
Ascent: @wrapped_font.ascender * scaling_factor,
Descent: @wrapped_font.descender * scaling_factor,
StemV: @wrapped_font.dominant_vertical_stem_width})
if @wrapped_font[:'OS/2'].version >= 2
fd[:CapHeight] = @wrapped_font.cap_height * scaling_factor
fd[:XHeight] = @wrapped_font.x_height * scaling_factor
else # estimate values
# Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#ch
fd[:CapHeight] = if @cmap[0x0048] # H
@wrapped_font[:glyf][@cmap[0x0048]].y_max * scaling_factor
else
@wrapped_font.ascender * 0.8 * scaling_factor
end
# Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#xh
fd[:XHeight] = if @cmap[0x0078] # x
@wrapped_font[:glyf][@cmap[0x0078]].y_max * scaling_factor
else
@wrapped_font.ascender * 0.5 * scaling_factor
end
end
fd.flag(:fixed_pitch) if @wrapped_font[:post].is_fixed_pitch? ||
@wrapped_font[:hhea].num_of_long_hor_metrics == 1
fd.flag(:italic) if @wrapped_font[:'OS/2'].selection_include?(:italic) ||
@wrapped_font[:'OS/2'].selection_include?(:oblique)
fd.flag(:symbolic)
cid_font = document.add({Type: :Font, Subtype: :CIDFontType2,
BaseFont: fd[:FontName], FontDescriptor: fd,
CIDSystemInfo: {Registry: "Adobe", Ordering: "Identity",
Supplement: 0},
CIDToGIDMap: :Identity})
dict = document.add({Type: :Font, Subtype: :Type0, BaseFont: cid_font[:BaseFont],
Encoding: :'Identity-H', DescendantFonts: [cid_font]})
dict.font_wrapper = self
document.register_listener(:complete_objects) do
update_font_name(dict)
embed_font(dict, document)
complete_width_information(dict)
create_to_unicode_cmap(dict, document)
end
dict
end
UPPERCASE_LETTERS = ('A'..'Z').to_a.freeze #:nodoc:
# Updates the font name with a unique tag if the font is subset.
def update_font_name(dict)
return unless @subsetter
tag = +''
data = @encoded_glyphs.each_with_object(''.b) {|(id, v), s| s << id.to_s << v[0] }
hash = Digest::MD5.hexdigest(data << @wrapped_font.font_name).to_i(16)
while hash != 0 && tag.length < 6
hash, mod = hash.divmod(UPPERCASE_LETTERS.length)
tag << UPPERCASE_LETTERS[mod]
end
name = (tag << "+" << @wrapped_font.font_name).intern
dict[:BaseFont] = name
dict[:DescendantFonts].first[:BaseFont] = name
dict[:DescendantFonts].first[:FontDescriptor][:FontName] = name
end
# Embeds the font.
def embed_font(dict, document)
if @subsetter
data = @subsetter.build_font
length = data.size
stream = HexaPDF::StreamData.new(length: length) { data }
else
length = @wrapped_font.io.size
stream = HexaPDF::StreamData.new(@wrapped_font.io, length: length)
end
font = document.add({Length1: length, Filter: :FlateDecode}, stream: stream)
dict[:DescendantFonts].first[:FontDescriptor][:FontFile2] = font
end
# Adds the /DW and /W fields to the CIDFont dictionary.
def complete_width_information(dict)
default_width = glyph(3, " ").width.to_i
widths = @encoded_glyphs.reject {|_, v| v[1].width == default_width }.map do |id, v|
[(@subsetter ? @subsetter.subset_glyph_id(id) : id), v[1].width]
end.sort!
dict[:DescendantFonts].first.set_widths(widths, default_width: default_width)
end
# Creates the /ToUnicode CMap and updates the font dictionary so that text extraction works
# correctly.
def create_to_unicode_cmap(dict, document)
stream = HexaPDF::StreamData.new do
mapping = @encoded_glyphs.keys.map! do |id|
# Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
[(@subsetter ? @subsetter.subset_glyph_id(id) : id), @cmap.gid_to_code(id) || 0xFFFD]
end.sort_by!(&:first)
HexaPDF::Font::CMap.create_to_unicode_cmap(mapping)
end
stream_obj = document.add({}, stream: stream)
stream_obj.set_filter(:FlateDecode)
dict[:ToUnicode] = stream_obj
end
end
end
end