# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2024 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/font/true_type/table'
module HexaPDF
module Font
module TrueType
class Table
# Generic base class for all cmap subtables.
#
# cmap format 8.0 is currently not implemented because use of the format is discouraged in
# the specification and no font with a format 8.0 cmap subtable was available for testing.
#
# The preferred cmap format is 12.0 because it supports all of Unicode and allows for fast
# and memory efficient code-to-gid as well as gid-to-code mappings.
#
# See:
# * Cmap
# * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
class CmapSubtable
# The platform identifier for Unicode.
PLATFORM_UNICODE = 0
# The platform identifier for Microsoft.
PLATFORM_MICROSOFT = 3
# The platform identifier.
attr_accessor :platform_id
# The platform-specific encoding identifier.
attr_accessor :encoding_id
# The cmap format or +nil+ if the subtable wasn't read from a file.
attr_reader :format
# The language code.
attr_accessor :language
# The complete code map.
#
# Is only fully initialized for existing fonts when a mapping is first accessed via #[].
attr_accessor :code_map
# The complete gid map.
#
# Is only fully initialized for existing fonts when a mapping is first accessed via
# #gid_to_code.
attr_accessor :gid_map
# Creates a new subtable.
def initialize(platform_id, encoding_id)
@platform_id = platform_id
@encoding_id = encoding_id
@supported = true
@code_map = {}
@gid_map = {}
@format = nil
@language = 0
end
# Returns +true+ if this subtable contains a Unicode cmap.
def unicode?
(platform_id == PLATFORM_MICROSOFT && (encoding_id == 1 || encoding_id == 10)) ||
platform_id == PLATFORM_UNICODE
end
# Returns the glyph index for the given character code or +nil+ if the character code is
# not mapped.
def [](code)
@code_map[code]
end
# Returns a character code for the given glyph index or +nil+ if the given glyph index
# does not exist or is not mapped to a character code.
#
# Note that some fonts map multiple character codes to the same glyph (e.g. hyphen and
# minus), i.e. the code-to-glyph mapping is surjective but not injective! In such a case
# one of the available character codes is returned.
def gid_to_code(gid)
@gid_map[gid]
end
# :call-seq:
# subtable.parse!(io, offset) => true or false
#
# Parses the cmap subtable from the IO at the given offset.
#
# If the subtable format is supported, the information is used to populate this object and
# +true+ is returned. Otherwise nothing is done and +false+ is returned.
def parse(io, offset)
io.pos = offset
@format = io.read(2).unpack1('n')
if [8, 10, 12].include?(@format)
io.pos += 2
length, @language = io.read(8).unpack('N2')
elsif [0, 2, 4, 6].include?(@format)
length, @language = io.read(4).unpack('n2')
end
return false unless [0, 2, 4, 6, 10, 12].include?(@format)
offset = io.pos
@code_map = lambda do |code|
parse_mapping(io, offset, length)
@code_map[code]
end
@gid_map = lambda do |gid|
parse_mapping(io, offset, length)
@gid_map[gid]
end
true
end
def parse_mapping(io, offset, length)
io.pos = offset
@code_map, @gid_map = case @format
when 0 then Format0.parse(io, length)
when 2 then Format2.parse(io, length)
when 4 then Format4.parse(io, length)
when 6 then Format6.parse(io, length)
when 10 then Format10.parse(io, length)
when 12 then Format12.parse(io, length)
end
end
private :parse_mapping
def inspect #:nodoc:
"#<#{self.class.name} (#{platform_id}, #{encoding_id}, #{language}, " \
"#{format.inspect})>"
end
# Cmap format 0
module Format0
# :call-seq:
# Format0.parse(io, length) -> code_map
#
# Parses the format 0 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first six bytes of the subtable have already been consumed.
def self.parse(io, length)
raise HexaPDF::Error, "Invalid length #{length} for cmap format 0" if length != 262
code_map = io.read(256).unpack('C*')
gid_map = {}
code_map.each_with_index {|glyph, index| gid_map[glyph] = index }
[code_map, gid_map]
end
end
# Cmap format 2
module Format2
SubHeader = Struct.new(:first_code, :entry_count, :id_delta, :first_glyph_index)
# :call-seq:
# Format2.parse(io, length) -> code_map
#
# Parses the format 2 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first six bytes of the subtable have already been consumed.
def self.parse(io, length)
sub_header_keys = io.read(512).unpack('n*')
nr_sub_headers = 0
sub_header_keys.map! do |key|
nr_sub_headers = key if key > nr_sub_headers
key / 8
end
nr_sub_headers = 1 + nr_sub_headers / 8
sub_headers = []
nr_sub_headers.times do |i|
h = SubHeader.new(*io.read(8).unpack('n2s>n'))
# Map the currently stored id_range_offset to the corresponding glyph index by first
# changing the offset to begin from the position of the first glyph index and then
# halfing the value since each glyph is a UInt16.
h.first_glyph_index = (h.first_glyph_index - 2 - 8 * (nr_sub_headers - i - 1)) / 2
sub_headers << h
end
glyph_indexes = io.read(length - 6 - 512 - 8 * nr_sub_headers).unpack('n*')
gid_map = {}
sub_headers.each_with_index do |sub_header, i|
sub_header.entry_count.times do |j|
glyph_id = glyph_indexes[sub_header.first_glyph_index + j]
glyph_id = (glyph_id + sub_header.id_delta) % 65536 if glyph_id != 0
gid_map[glyph_id] = (sub_header_keys.index(i) << 8) + j + sub_header.first_code
end
end
[mapper(sub_header_keys, sub_headers, glyph_indexes), gid_map]
end
def self.mapper(sub_header_keys, sub_headers, glyph_indexes) #:nodoc:
Hash.new do |h, code|
i = code
i, j = i.divmod(256) if code > 255
k = sub_header_keys[i]
if !k
glyph_id = 0
elsif k > 0
sub_header = sub_headers[k]
raise HexaPDF::Error, "Second byte of character code missing" if j.nil?
j -= sub_header.first_code
if 0 <= j && j < sub_header.entry_count
glyph_id = glyph_indexes[sub_header.first_glyph_index + j]
glyph_id = (glyph_id + sub_header.id_delta) % 65536 if glyph_id != 0
else
glyph_id = 0
end
else
glyph_id = glyph_indexes[i]
end
h[code] = glyph_id unless glyph_id == 0
end
end
end
# Cmap format 4
module Format4
# :call-seq:
# Format4.parse(io, length) -> code_map
#
# Parses the format 4 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first six bytes of the subtable have already been consumed.
def self.parse(io, length)
seg_count_x2 = io.read(8).unpack1('n')
end_codes = io.read(seg_count_x2).unpack('n*')
io.pos += 2
start_codes = io.read(seg_count_x2).unpack('n*')
id_deltas = io.read(seg_count_x2).unpack('n*')
id_range_offsets = io.read(seg_count_x2).unpack('n*').map!.with_index do |offset, idx|
# Change offsets to indexes, starting from the id_range_offsets array
offset == 0 ? offset : offset / 2 + idx
end
glyph_indexes = io.read(length - 16 - seg_count_x2 * 4).unpack('n*')
mapper(end_codes, start_codes, id_deltas, id_range_offsets, glyph_indexes)
end
# :nodoc:
def self.mapper(end_codes, start_codes, id_deltas, id_range_offsets, glyph_indexes)
compute_glyph_id = lambda do |index, code|
offset = id_range_offsets[index]
if offset == 0
glyph_id = (code + id_deltas[index]) % 65536
else
glyph_id = glyph_indexes[offset - end_codes.length + (code - start_codes[index])]
glyph_id ||= 0 # Handle invalid subtable entries
glyph_id = (glyph_id + id_deltas[index]) % 65536 if glyph_id != 0
end
glyph_id
end
code_map = Hash.new do |h, code|
i = end_codes.bsearch_index {|c| c >= code }
glyph_id = (i && start_codes[i] <= code ? compute_glyph_id.call(i, code) : 0)
h[code] = glyph_id unless glyph_id == 0
end
gid_map = {}
end_codes.length.times do |i|
start_codes[i].upto(end_codes[i]) do |code|
gid_map[compute_glyph_id.call(i, code)] = code
end
end
[code_map, gid_map]
end
end
# Cmap format 6
module Format6
# :call-seq:
# Format6.parse(io, length) -> code_map
#
# Parses the format 6 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first six bytes of the subtable have already been consumed.
def self.parse(io, _length)
first_code, entry_count = io.read(4).unpack('n2')
code_map = io.read(2 * entry_count).unpack('n*')
gid_map = {}
code_map = code_map.each_with_index.with_object({}) do |(g, i), hash|
hash[first_code + i] = g
gid_map[g] = first_code + i
end
[code_map, gid_map]
end
end
# Cmap format 10
module Format10
# :call-seq:
# Format10.parse(io, length) -> code_map
#
# Parses the format 10 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first twelve bytes of the subtable have already been consumed.
def self.parse(io, _length)
first_code, entry_count = io.read(8).unpack('N2')
code_map = io.read(2 * entry_count).unpack('n*')
gid_map = {}
code_map = code_map.each_with_index.with_object({}) do |(g, i), hash|
hash[first_code + i] = g
gid_map[g] = first_code + i
end
[code_map, gid_map]
end
end
# Cmap format 12
module Format12
# :call-seq:
# Format12.parse(io, length) -> code_map
#
# Parses the format 12 cmap subtable from the given IO at the current position and
# returns the contained code map.
#
# It is assumed that the first twelve bytes of the subtable have already been consumed.
def self.parse(io, _length)
mapper(Array.new(io.read(4).unpack1('N')) { io.read(12).unpack('N3') })
end
# The parameter +groups+ is an array containing [start_code, end_code, start_glyph_id]
# arrays.
def self.mapper(groups) #:nodoc:
code_map = Hash.new do |h, code|
group = groups.bsearch {|g| g[1] >= code }
h[code] = group[2] + (code - group[0]) if group && group[0] <= code
end
groups_by_gid = groups.sort_by {|g| g[2] }
gid_map = Hash.new do |h, gid|
group = groups_by_gid.bsearch {|g| g[2] + g[1] - g[0] >= gid }
h[gid] = group[0] + (gid - group[2]) if group && group[2] <= gid
end
[code_map, gid_map]
end
end
end
end
end
end
end