require 'ttfunk' require 'ttfunk/subset_collection' module Prawn class Font class TTF < Font attr_reader :ttf, :subsets def initialize(document, name, options={}) super @ttf = read_ttf_file @subsets = TTFunk::SubsetCollection.new(@ttf) @attributes = {} @bounding_boxes = {} @char_widths = {} @has_kerning_data = @ttf.kerning.exists? && @ttf.kerning.tables.any? @ascender = Integer(@ttf.ascent * scale_factor) @descender = Integer(@ttf.descent * scale_factor) @line_gap = Integer(@ttf.line_gap * scale_factor) end # +string+ must be UTF8-encoded. def width_of(string, options={}) scale = (options[:size] || size) / 1000.0 if options[:kerning] kern(string).inject(0) do |s,r| if r.is_a?(Numeric) s - r else r.inject(s) { |s2, u| s2 + character_width_by_code(u) } end end * scale else string.unpack("U*").inject(0) do |s,r| s + character_width_by_code(r) end * scale end end def bbox @bbox ||= @ttf.bbox.map { |i| Integer(i * scale_factor) } end def has_kerning_data? @has_kerning_data end # Perform any changes to the string that need to happen # before it is rendered to the canvas. Returns an array of # subset "chunks", where the even-numbered indices are the # font subset number, and the following entry element is # either a string or an array (for kerned text). # # The +text+ parameter must be UTF8-encoded. def encode_text(text,options={}) text = text.chomp if options[:kerning] last_subset = nil kern(text).inject([]) do |result, element| if element.is_a?(Numeric) result.last[1] = [result.last[1]] unless result.last[1].is_a?(Array) result.last[1] << element result else encoded = @subsets.encode(element) if encoded.first[0] == last_subset result.last[1] << encoded.first[1] encoded.shift end if encoded.any? last_subset = encoded.last[0] result + encoded else result end end end else @subsets.encode(text.unpack("U*")) end end def basename @basename ||= @ttf.name.postscript_name end # not sure how to compute this for true-type fonts... def stemV 0 end def italic_angle @italic_angle ||= if @ttf.postscript.exists? raw = @ttf.postscript.italic_angle hi, low = raw >> 16, raw & 0xFF hi = -((hi ^ 0xFFFF) + 1) if hi & 0x8000 != 0 "#{hi}.#{low}".to_f else 0 end end def cap_height @cap_height ||= begin height = @ttf.os2.exists? && @ttf.os2.cap_height || 0 height == 0 ? ascender : height end end def x_height # FIXME: seems like if os2 table doesn't exist, we could # just find the height of the lower-case 'x' glyph? @ttf.os2.exists? && @ttf.os2.x_height || 0 end def family_class @family_class ||= (@ttf.os2.exists? && @ttf.os2.family_class || 0) >> 8 end def serif? @serif ||= [1,2,3,4,5,7].include?(family_class) end def script? @script ||= family_class == 10 end def pdf_flags @flags ||= begin flags = 0 flags |= 0x0001 if @ttf.postscript.fixed_pitch? flags |= 0x0002 if serif? flags |= 0x0008 if script? flags |= 0x0040 if italic_angle != 0 flags |= 0x0004 # assume the font contains at least some non-latin characters end end def normalize_encoding(text) if text.respond_to?(:encode!) # if we're running under a M17n aware VM, ensure the string provided is # UTF-8 (by converting it if necessary) begin text.encode!("UTF-8") rescue raise Prawn::Errors::IncompatibleStringEncoding, "Encoding " + "#{text.encoding} can not be transparently converted to UTF-8. " + "Please ensure the encoding of the string you are attempting " + "to use is set correctly" end else # on a non M17N aware VM, use unpack as a hackish way to verify the # string is valid utf-8. I thought it was better than loading iconv # though. begin text.unpack("U*") rescue raise Prawn::Errors::IncompatibleStringEncoding, "The string you " + "are attempting to render is not encoded in valid UTF-8." end end end private def cmap @cmap ||= @ttf.cmap.unicode.first or raise("no unicode cmap for font") end # +string+ must be UTF8-encoded. # # Returns an array. If an element is a numeric, it represents the # kern amount to inject at that position. Otherwise, the element # is an array of UTF-16 characters. def kern(string) a = [] string.unpack("U*").each do |r| if a.empty? a << [r] elsif (kern = kern_pairs_table[[cmap[a.last.last], cmap[r]]]) kern *= scale_factor a << -kern << [r] else a.last << r end end a end def kern_pairs_table @kerning_data ||= has_kerning_data? ? @ttf.kerning.tables.first.pairs : {} end def cid_to_gid_map max = cmap.code_map.keys.max (0..max).map { |cid| cmap[cid] }.pack("n*") end def hmtx @hmtx ||= @ttf.horizontal_metrics end def character_width_by_code(code) return 0 unless cmap[code] @char_widths[code] ||= Integer(hmtx.widths[cmap[code]] * scale_factor) end def scale_factor @scale ||= 1000.0 / @ttf.header.units_per_em end def register(subset) temp_name = @ttf.name.postscript_name @document.ref(:Type => :Font, :BaseFont => temp_name) { |ref| embed(ref, subset) } end def embed(reference, subset) font_content = @subsets[subset].encode # FIXME: we need postscript_name and glyph widths from the font # subset. Perhaps this could be done by querying the subset, # rather than by parsing the font that the subset produces? font = TTFunk::File.new(font_content) basename = font.name.postscript_name raise "Can't detect a postscript name for #{file}" if basename.nil? compressed_font = Zlib::Deflate.deflate(font_content) fontfile = @document.ref(:Length => compressed_font.size, :Length1 => font_content.size, :Filter => :FlateDecode ) fontfile << compressed_font descriptor = @document.ref(:Type => :FontDescriptor, :FontName => basename, :FontFile2 => fontfile, :FontBBox => bbox, :Flags => pdf_flags, :StemV => stemV, :ItalicAngle => italic_angle, :Ascent => ascender, :Descent => descender, :CapHeight => cap_height, :XHeight => x_height) hmtx = font.horizontal_metrics widths = font.cmap.tables.first.code_map.map { |gid| Integer(hmtx.widths[gid] * scale_factor) } reference.data.update(:Subtype => :TrueType, :BaseFont => basename, :FontDescriptor => descriptor, :FirstChar => 0, :LastChar => 255, :Widths => @document.ref(widths)) if @subsets[subset].unicode? map = @subsets[subset].to_unicode_map ranges = [[]] lines = map.keys.sort.inject("") do |s, code| ranges << [] if ranges.last.length >= 100 unicode = map[code] ranges.last << "<%02x><%04x>" % [code, unicode] end range_blocks = ranges.inject("") do |s, list| s << "%d beginbfchar\n%s\nendbfchar\n" % [list.length, list.join("\n")] end to_unicode_cmap = UNICODE_CMAP_TEMPLATE % range_blocks.strip cmap = @document.ref({}) cmap << to_unicode_cmap cmap.compress_stream @references[subset].data[:ToUnicode] = cmap else @references[subset].data[:Encoding] = :MacRomanEncoding end end UNICODE_CMAP_TEMPLATE = <<-STR.strip.gsub(/^\s*/, "") /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange %s endcmap CMapName currentdict /CMap defineresource pop end end STR def read_ttf_file TTFunk::File.open(@name) end end end end