# prawn/font/ttf.rb : Implements AFM font support for Prawn # # Copyright May 2008, Gregory Brown / James Healy / Jamis Buck # All Rights Reserved. # # This is free software. Please see the LICENSE and COPYING files for details. require 'ttfunk' require 'ttfunk/subset_collection' module Prawn class Font # @private class TTF < Font attr_reader :ttf, :subsets def unicode? true end def initialize(document, name, options = {}) super @ttf = read_ttf_file @subsets = TTFunk::SubsetCollection.new(@ttf) @attributes = {} @bounding_boxes = {} @char_widths = {} @has_kerning_data = @ttf.kerning.exists? && @ttf.kerning.tables.any? @ascender = Integer(@ttf.ascent * scale_factor) @descender = Integer(@ttf.descent * scale_factor) @line_gap = Integer(@ttf.line_gap * scale_factor) end # NOTE: +string+ must be UTF8-encoded. def compute_width_of(string, options = {}) #:nodoc: scale = (options[:size] || size) / 1000.0 if options[:kerning] kern(string).inject(0) do |s, r| if r.is_a?(Numeric) s - r else r.inject(s) { |a, e| a + character_width_by_code(e) } end end * scale else string.codepoints.inject(0) do |s, r| s + character_width_by_code(r) end * scale end end # The font bbox, as an array of integers # def bbox @bbox ||= @ttf.bbox.map { |i| Integer(i * scale_factor) } end # Returns true if the font has kerning data, false otherwise def has_kerning_data? @has_kerning_data end # Perform any changes to the string that need to happen # before it is rendered to the canvas. Returns an array of # subset "chunks", where the even-numbered indices are the # font subset number, and the following entry element is # either a string or an array (for kerned text). # # The +text+ parameter must be UTF8-encoded. # def encode_text(text, options = {}) text = text.chomp if options[:kerning] last_subset = nil kern(text).inject([]) do |result, element| if element.is_a?(Numeric) unless result.last[1].is_a?(Array) result.last[1] = [result.last[1]] end result.last[1] << element result else encoded = @subsets.encode(element) if encoded.first[0] == last_subset result.last[1] << encoded.first[1] encoded.shift end if encoded.any? last_subset = encoded.last[0] result + encoded else result end end end else @subsets.encode(text.unpack('U*')) end end def basename @basename ||= @ttf.name.postscript_name end # not sure how to compute this for true-type fonts... def stemV # rubocop: disable Style/MethodName 0 end def italic_angle return @italic_angle if @italic_angle if @ttf.postscript.exists? raw = @ttf.postscript.italic_angle hi = raw >> 16 low = raw & 0xFF hi = -((hi ^ 0xFFFF) + 1) if hi & 0x8000 != 0 @italic_angle = "#{hi}.#{low}".to_f else @italic_angle = 0 end @italic_angle end def cap_height @cap_height ||= begin height = @ttf.os2.exists? && @ttf.os2.cap_height || 0 height.zero? ? @ascender : height end end def x_height # FIXME: seems like if os2 table doesn't exist, we could # just find the height of the lower-case 'x' glyph? @ttf.os2.exists? && @ttf.os2.x_height || 0 end def family_class @family_class ||= (@ttf.os2.exists? && @ttf.os2.family_class || 0) >> 8 end def serif? @serif ||= [1, 2, 3, 4, 5, 7].include?(family_class) end def script? @script ||= family_class == 10 end def pdf_flags @flags ||= begin flags = 0 flags |= 0x0001 if @ttf.postscript.fixed_pitch? flags |= 0x0002 if serif? flags |= 0x0008 if script? flags |= 0x0040 if italic_angle != 0 # Assume the font contains at least some non-latin characters flags | 0x0004 end end def normalize_encoding(text) text.encode(::Encoding::UTF_8) rescue => e puts e raise Prawn::Errors::IncompatibleStringEncoding, 'Encoding ' \ "#{text.encoding} can not be transparently converted to UTF-8. " \ 'Please ensure the encoding of the string you are attempting ' \ 'to use is set correctly' end def to_utf8(text) text.encode('UTF-8') end def glyph_present?(char) code = char.codepoints.first cmap[code] > 0 end # Returns the number of characters in +str+ (a UTF-8-encoded string). # def character_count(str) str.length end private def cmap (@cmap ||= @ttf.cmap.unicode.first) || raise('no unicode cmap for font') end # +string+ must be UTF8-encoded. # # Returns an array. If an element is a numeric, it represents the # kern amount to inject at that position. Otherwise, the element # is an array of UTF-16 characters. def kern(string) a = [] string.each_codepoint do |r| if a.empty? a << [r] elsif (kern = kern_pairs_table[[cmap[a.last.last], cmap[r]]]) kern *= scale_factor a << -kern << [r] else a.last << r end end a end def kern_pairs_table @kerning_data ||= if has_kerning_data? @ttf.kerning.tables.first.pairs else {} end end def hmtx @hmtx ||= @ttf.horizontal_metrics end def character_width_by_code(code) return 0 unless cmap[code] # Some TTF fonts have nonzero widths for \n (UTF-8 / ASCII code: 10). # Patch around this as we'll never be drawing a newline with a width. return 0.0 if code == 10 @char_widths[code] ||= Integer(hmtx.widths[cmap[code]] * scale_factor) end def scale_factor @scale ||= 1000.0 / @ttf.header.units_per_em end def register(subset) temp_name = @ttf.name.postscript_name.delete("\0").to_sym ref = @document.ref!(Type: :Font, BaseFont: temp_name) # Embed the font metrics in the document after everything has been # drawn, just before the document is emitted. @document.renderer.before_render { |_doc| embed(ref, subset) } ref end def embed(reference, subset) font_content = @subsets[subset].encode # FIXME: we need postscript_name and glyph widths from the font # subset. Perhaps this could be done by querying the subset, # rather than by parsing the font that the subset produces? font = TTFunk::File.new(font_content) # empirically, it looks like Adobe Reader will not display fonts # if their font name is more than 33 bytes long. Strange. But true. basename = font.name.postscript_name[0, 33].delete("\0") raise "Can't detect a postscript name for #{file}" if basename.nil? fontfile = @document.ref!(Length1: font_content.size) fontfile.stream << font_content fontfile.stream.compress! descriptor = @document.ref!( Type: :FontDescriptor, FontName: basename.to_sym, FontFile2: fontfile, FontBBox: bbox, Flags: pdf_flags, StemV: stemV, ItalicAngle: italic_angle, Ascent: @ascender, Descent: @descender, CapHeight: cap_height, XHeight: x_height ) hmtx = font.horizontal_metrics widths = font.cmap.tables.first.code_map.map do |gid| Integer(hmtx.widths[gid] * scale_factor) end[32..-1] # It would be nice to have Encoding set for the macroman subsets, # and only do a ToUnicode cmap for non-encoded unicode subsets. # However, apparently Adobe Reader won't render MacRoman encoded # subsets if original font contains unicode characters. (It has to # be some flag or something that ttfunk is simply copying over... # but I can't figure out which flag that is.) # # For now, it's simplest to just create a unicode cmap for every font. # It offends my inner purist, but it'll do. map = @subsets[subset].to_unicode_map ranges = [[]] map.keys.sort.inject('') do |_s, code| ranges << [] if ranges.last.length >= 100 unicode = map[code] ranges.last << format('<%02x><%04x>', code, unicode) end range_blocks = ranges.inject('') do |s, list| s << format( "%d beginbfchar\n%s\nendbfchar\n", list.length, list.join("\n") ) end to_unicode_cmap = UNICODE_CMAP_TEMPLATE % range_blocks.strip cmap = @document.ref!({}) cmap << to_unicode_cmap cmap.stream.compress! reference.data.update( Subtype: :TrueType, BaseFont: basename.to_sym, FontDescriptor: descriptor, FirstChar: 32, LastChar: 255, Widths: @document.ref!(widths), ToUnicode: cmap ) end UNICODE_CMAP_TEMPLATE = <<-STR.strip.gsub(/^\s*/, '') /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange %s endcmap CMapName currentdict /CMap defineresource pop end end STR def read_ttf_file TTFunk::File.open(@name) end end end end