# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2022 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/font/cmap'
module HexaPDF
module Font
class CMap
# Creates a CMap file.
#
# Currently only ToUnicode CMaps are supported.
class Writer
# Maximum number of entries in one section.
MAX_ENTRIES_IN_SECTION = 100
# Returns a ToUnicode CMap for the given input code to Unicode codepoint mapping which needs
# to be sorted by input codes.
#
# Note that the returned CMap always uses a 16-bit input code space!
def create_to_unicode_cmap(mapping)
return to_unicode_template % '' if mapping.empty?
chars, ranges = compute_section_entries(mapping)
result = create_sections("bfchar", chars.size / 2) do |index|
index *= 2
sprintf("<%04X>", chars[index]) << "<" <<
((+'').force_encoding(::Encoding::UTF_16BE) << chars[index + 1]).unpack1('H*') <<
">\n"
end
result << create_sections("bfrange", ranges.size / 3) do |index|
index *= 3
sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << "<" <<
((+'').force_encoding(::Encoding::UTF_16BE) << ranges[index + 2]).unpack1('H*') <<
">\n"
end
to_unicode_template % result.chop!
end
private
# Computes the entries for the "char" and "range" sections based on the given mapping.
#
# Returns two arrays +char_mappings+ and +range_mappings+ where +char_mappings+ is an array
# of the form
#
# [code1, value1, code2, value2, ...]
#
# and +range_mappings+ an array of the form
#
# [start1, end1, value1, start2, end2, value2, ...]
def compute_section_entries(mapping)
chars = []
ranges = []
last_code, last_value = *mapping[0]
is_range = false
mapping.slice(1..-1).each do |code, value|
if last_code + 1 == code && last_value + 1 == value && code % 256 != 0
ranges << last_code << nil << last_value unless is_range
is_range = true
elsif is_range
ranges[-2] = last_code
is_range = false
else
chars << last_code << last_value
end
last_code = code
last_value = value
end
# Handle last remaining mapping
if is_range
ranges[-2] = last_code
else
chars << last_code << last_value
end
[chars, ranges]
end
# Creates one or more sections of a CMap file and returns the resulting string.
#
# +type+::
# The name of the section, e.g. "bfchar" or "bfrange".
#
# +size+::
# The maximum number of elements of this type. Used for determining when to start a new
# section.
#
# The method makes sure that no section has more than the maximum number of allowed entries.
#
# Numbers from 0 up to size - 1 are yielded, indicating the current entry that should be
# processed and for which an appropriate section line should be returned from the block.
def create_sections(type, size)
return +'' if size == 0
result = +""
index = 0
while size > 0
count = [MAX_ENTRIES_IN_SECTION, size].min
result << "#{count} begin#{type}\n"
index.upto(index + count - 1) {|i| result << yield(i) }
result << "end#{type}\n"
index += count
size -= count
end
result
end
# Returns the CMap file template for a ToUnicode CMap.
def to_unicode_template
<<~TEMPLATE
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS)
/Supplement 0
>> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000>
endcodespacerange
%s
endcmap
CMapName currentdict /CMap defineresource pop
end
end
TEMPLATE
end
end
end
end
end