# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2024 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'hexapdf/font/true_type/table' module HexaPDF module Font module TrueType class Table # Generic base class for all cmap subtables. # # cmap format 8.0 is currently not implemented because use of the format is discouraged in # the specification and no font with a format 8.0 cmap subtable was available for testing. # # The preferred cmap format is 12.0 because it supports all of Unicode and allows for fast # and memory efficient code-to-gid as well as gid-to-code mappings. # # See: # * Cmap # * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html class CmapSubtable # The platform identifier for Unicode. PLATFORM_UNICODE = 0 # The platform identifier for Microsoft. PLATFORM_MICROSOFT = 3 # The platform identifier. attr_accessor :platform_id # The platform-specific encoding identifier. attr_accessor :encoding_id # The cmap format or +nil+ if the subtable wasn't read from a file. attr_reader :format # The language code. attr_accessor :language # The complete code map. # # Is only fully initialized for existing fonts when a mapping is first accessed via #[]. attr_accessor :code_map # The complete gid map. # # Is only fully initialized for existing fonts when a mapping is first accessed via # #gid_to_code. attr_accessor :gid_map # Creates a new subtable. def initialize(platform_id, encoding_id) @platform_id = platform_id @encoding_id = encoding_id @supported = true @code_map = {} @gid_map = {} @format = nil @language = 0 end # Returns +true+ if this subtable contains a Unicode cmap. def unicode? (platform_id == PLATFORM_MICROSOFT && (encoding_id == 1 || encoding_id == 10)) || platform_id == PLATFORM_UNICODE end # Returns the glyph index for the given character code or +nil+ if the character code is # not mapped. def [](code) @code_map[code] end # Returns a character code for the given glyph index or +nil+ if the given glyph index # does not exist or is not mapped to a character code. # # Note that some fonts map multiple character codes to the same glyph (e.g. hyphen and # minus), i.e. the code-to-glyph mapping is surjective but not injective! In such a case # one of the available character codes is returned. def gid_to_code(gid) @gid_map[gid] end # :call-seq: # subtable.parse!(io, offset) => true or false # # Parses the cmap subtable from the IO at the given offset. # # If the subtable format is supported, the information is used to populate this object and # +true+ is returned. Otherwise nothing is done and +false+ is returned. def parse(io, offset) io.pos = offset @format = io.read(2).unpack1('n') if [8, 10, 12].include?(@format) io.pos += 2 length, @language = io.read(8).unpack('N2') elsif [0, 2, 4, 6].include?(@format) length, @language = io.read(4).unpack('n2') end return false unless [0, 2, 4, 6, 10, 12].include?(@format) offset = io.pos @code_map = lambda do |code| parse_mapping(io, offset, length) @code_map[code] end @gid_map = lambda do |gid| parse_mapping(io, offset, length) @gid_map[gid] end true end def parse_mapping(io, offset, length) io.pos = offset @code_map, @gid_map = case @format when 0 then Format0.parse(io, length) when 2 then Format2.parse(io, length) when 4 then Format4.parse(io, length) when 6 then Format6.parse(io, length) when 10 then Format10.parse(io, length) when 12 then Format12.parse(io, length) end end private :parse_mapping def inspect #:nodoc: "#<#{self.class.name} (#{platform_id}, #{encoding_id}, #{language}, " \ "#{format.inspect})>" end # Cmap format 0 module Format0 # :call-seq: # Format0.parse(io, length) -> code_map # # Parses the format 0 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first six bytes of the subtable have already been consumed. def self.parse(io, length) raise HexaPDF::Error, "Invalid length #{length} for cmap format 0" if length != 262 code_map = io.read(256).unpack('C*') gid_map = {} code_map.each_with_index {|glyph, index| gid_map[glyph] = index } [code_map, gid_map] end end # Cmap format 2 module Format2 SubHeader = Struct.new(:first_code, :entry_count, :id_delta, :first_glyph_index) # :call-seq: # Format2.parse(io, length) -> code_map # # Parses the format 2 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first six bytes of the subtable have already been consumed. def self.parse(io, length) sub_header_keys = io.read(512).unpack('n*') nr_sub_headers = 0 sub_header_keys.map! do |key| nr_sub_headers = key if key > nr_sub_headers key / 8 end nr_sub_headers = 1 + nr_sub_headers / 8 sub_headers = [] nr_sub_headers.times do |i| h = SubHeader.new(*io.read(8).unpack('n2s>n')) # Map the currently stored id_range_offset to the corresponding glyph index by first # changing the offset to begin from the position of the first glyph index and then # halfing the value since each glyph is a UInt16. h.first_glyph_index = (h.first_glyph_index - 2 - 8 * (nr_sub_headers - i - 1)) / 2 sub_headers << h end glyph_indexes = io.read(length - 6 - 512 - 8 * nr_sub_headers).unpack('n*') gid_map = {} sub_headers.each_with_index do |sub_header, i| sub_header.entry_count.times do |j| glyph_id = glyph_indexes[sub_header.first_glyph_index + j] glyph_id = (glyph_id + sub_header.id_delta) % 65536 if glyph_id != 0 gid_map[glyph_id] = (sub_header_keys.index(i) << 8) + j + sub_header.first_code end end [mapper(sub_header_keys, sub_headers, glyph_indexes), gid_map] end def self.mapper(sub_header_keys, sub_headers, glyph_indexes) #:nodoc: Hash.new do |h, code| i = code i, j = i.divmod(256) if code > 255 k = sub_header_keys[i] if !k glyph_id = 0 elsif k > 0 sub_header = sub_headers[k] raise HexaPDF::Error, "Second byte of character code missing" if j.nil? j -= sub_header.first_code if 0 <= j && j < sub_header.entry_count glyph_id = glyph_indexes[sub_header.first_glyph_index + j] glyph_id = (glyph_id + sub_header.id_delta) % 65536 if glyph_id != 0 else glyph_id = 0 end else glyph_id = glyph_indexes[i] end h[code] = glyph_id unless glyph_id == 0 end end end # Cmap format 4 module Format4 # :call-seq: # Format4.parse(io, length) -> code_map # # Parses the format 4 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first six bytes of the subtable have already been consumed. def self.parse(io, length) seg_count_x2 = io.read(8).unpack1('n') end_codes = io.read(seg_count_x2).unpack('n*') io.pos += 2 start_codes = io.read(seg_count_x2).unpack('n*') id_deltas = io.read(seg_count_x2).unpack('n*') id_range_offsets = io.read(seg_count_x2).unpack('n*').map!.with_index do |offset, idx| # Change offsets to indexes, starting from the id_range_offsets array offset == 0 ? offset : offset / 2 + idx end glyph_indexes = io.read(length - 16 - seg_count_x2 * 4).unpack('n*') mapper(end_codes, start_codes, id_deltas, id_range_offsets, glyph_indexes) end # :nodoc: def self.mapper(end_codes, start_codes, id_deltas, id_range_offsets, glyph_indexes) compute_glyph_id = lambda do |index, code| offset = id_range_offsets[index] if offset == 0 glyph_id = (code + id_deltas[index]) % 65536 else glyph_id = glyph_indexes[offset - end_codes.length + (code - start_codes[index])] glyph_id ||= 0 # Handle invalid subtable entries glyph_id = (glyph_id + id_deltas[index]) % 65536 if glyph_id != 0 end glyph_id end code_map = Hash.new do |h, code| i = end_codes.bsearch_index {|c| c >= code } glyph_id = (i && start_codes[i] <= code ? compute_glyph_id.call(i, code) : 0) h[code] = glyph_id unless glyph_id == 0 end gid_map = {} end_codes.length.times do |i| start_codes[i].upto(end_codes[i]) do |code| gid_map[compute_glyph_id.call(i, code)] = code end end [code_map, gid_map] end end # Cmap format 6 module Format6 # :call-seq: # Format6.parse(io, length) -> code_map # # Parses the format 6 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first six bytes of the subtable have already been consumed. def self.parse(io, _length) first_code, entry_count = io.read(4).unpack('n2') code_map = io.read(2 * entry_count).unpack('n*') gid_map = {} code_map = code_map.each_with_index.with_object({}) do |(g, i), hash| hash[first_code + i] = g gid_map[g] = first_code + i end [code_map, gid_map] end end # Cmap format 10 module Format10 # :call-seq: # Format10.parse(io, length) -> code_map # # Parses the format 10 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first twelve bytes of the subtable have already been consumed. def self.parse(io, _length) first_code, entry_count = io.read(8).unpack('N2') code_map = io.read(2 * entry_count).unpack('n*') gid_map = {} code_map = code_map.each_with_index.with_object({}) do |(g, i), hash| hash[first_code + i] = g gid_map[g] = first_code + i end [code_map, gid_map] end end # Cmap format 12 module Format12 # :call-seq: # Format12.parse(io, length) -> code_map # # Parses the format 12 cmap subtable from the given IO at the current position and # returns the contained code map. # # It is assumed that the first twelve bytes of the subtable have already been consumed. def self.parse(io, _length) mapper(Array.new(io.read(4).unpack1('N')) { io.read(12).unpack('N3') }) end # The parameter +groups+ is an array containing [start_code, end_code, start_glyph_id] # arrays. def self.mapper(groups) #:nodoc: code_map = Hash.new do |h, code| group = groups.bsearch {|g| g[1] >= code } h[code] = group[2] + (code - group[0]) if group && group[0] <= code end groups_by_gid = groups.sort_by {|g| g[2] } gid_map = Hash.new do |h, gid| group = groups_by_gid.bsearch {|g| g[2] + g[1] - g[0] >= gid } h[gid] = group[0] + (gid - group[2]) if group && group[2] <= gid end [code_map, gid_map] end end end end end end end