# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2019 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'hexapdf/error'
require 'hexapdf/data_dir'
module HexaPDF
module Font
# Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
# value.
#
# See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
class CMap
autoload(:Parser, 'hexapdf/font/cmap/parser')
autoload(:Writer, 'hexapdf/font/cmap/writer')
CMAP_DIR = File.join(HexaPDF.data_dir, 'cmap') #:nodoc:
@cmap_cache = {}
# Returns +true+ if the given name specifies a predefined CMap.
def self.predefined?(name)
File.exist?(File.join(CMAP_DIR, name))
end
# Creates a new CMap object by parsing a predefined CMap with the given name.
#
# Raises an error if the given CMap is not found.
def self.for_name(name)
return @cmap_cache[name] if @cmap_cache.key?(name)
file = File.join(CMAP_DIR, name)
if File.exist?(file)
@cmap_cache[name] = parse(File.read(file, encoding: ::Encoding::UTF_8))
else
raise HexaPDF::Error, "No CMap named '#{name}' found"
end
end
# Creates a new CMap object from the given string which needs to contain a valid CMap file.
def self.parse(string)
Parser.new.parse(string)
end
# Returns a string containing a ToUnicode CMap that represents the given code to Unicode
# codepoint mapping.
#
# See: Writer#create_to_unicode_cmap
def self.create_to_unicode_cmap(mapping)
Writer.new.create_to_unicode_cmap(mapping)
end
# The registry part of the CMap version.
attr_accessor :registry
# The ordering part of the CMap version.
attr_accessor :ordering
# The supplement part of the CMap version.
attr_accessor :supplement
# The name of the CMap.
attr_accessor :name
# The writing mode of the CMap: 0 for horizontal, 1 for vertical writing.
attr_accessor :wmode
attr_reader :codespace_ranges #: nodoc:
attr_reader :cid_mapping # :nodoc:
attr_reader :cid_range_mappings # :nodoc:
attr_reader :unicode_mapping # :nodoc:
protected :codespace_ranges, :cid_mapping, :cid_range_mappings, :unicode_mapping
# Creates a new CMap object.
def initialize
@codespace_ranges = []
@cid_mapping = {}
@cid_range_mappings = []
@unicode_mapping = {}
end
# Add all mappings from the given CMap to this CMap.
def use_cmap(cmap)
@codespace_ranges.concat(cmap.codespace_ranges)
@cid_mapping.merge!(cmap.cid_mapping)
@cid_range_mappings.concat(cmap.cid_range_mappings)
@unicode_mapping.merge!(cmap.unicode_mapping)
end
# Add a codespace range using an array of ranges for the individual bytes.
#
# This means that the first range is checked against the first byte, the second range against
# the second byte and so on.
def add_codespace_range(first, *rest)
@codespace_ranges << [first, rest]
end
# Parses the string and returns all character codes.
#
# An error is raised if the string contains invalid bytes.
def read_codes(string)
codes = []
bytes = string.each_byte
loop do
byte = bytes.next
code = 0
found = @codespace_ranges.any? do |first_byte_range, rest_ranges|
next unless first_byte_range.cover?(byte)
code = (code << 8) + byte
valid = rest_ranges.all? do |range|
begin
byte = bytes.next
rescue StopIteration
raise HexaPDF::Error, "Missing bytes while reading codes via CMap"
end
code = (code << 8) + byte
range.cover?(byte)
end
codes << code if valid
end
unless found
raise HexaPDF::Error, "Invalid byte while reading codes via CMap: #{byte}"
end
end
codes
end
# Adds an individual mapping from character code to CID.
def add_cid_mapping(code, cid)
@cid_mapping[code] = cid
end
# Adds a CID range, mapping characters codes from +start_code+ to +end_code+ to CIDs starting
# with +start_cid+.
def add_cid_range(start_code, end_code, start_cid)
@cid_range_mappings << [start_code..end_code, start_cid]
end
# Returns the CID for the given character code, or 0 if no mapping was found.
def to_cid(code)
cid = @cid_mapping.fetch(code, -1)
if cid == -1
@cid_range_mappings.reverse_each do |range, start_cid|
if range.cover?(code)
cid = start_cid + code - range.first
break
end
end
end
(cid == -1 ? 0 : cid)
end
# Adds a mapping from character code to Unicode string in UTF-8 encoding.
def add_unicode_mapping(code, string)
@unicode_mapping[code] = string
end
# Returns the Unicode string in UTF-8 encoding for the given character code, or +nil+ if no
# mapping was found.
def to_unicode(code)
unicode_mapping[code]
end
end
end
end