Sha256: 4f4b7e6b84f48af019e054421c33304ec946137f68f471b3b8d8d8647662dcb2

Contents?: true

Size: 1.75 KB

Versions: 24

Compression:

Stored size: 1.75 KB

Contents

#!/usr/bin/env ruby
# coding: utf-8

# This demonstrates a way to extract TTF fonts from a PDF. It could be expanded
# to support extra font formats if required. Be aware that many PDFs subset
# fonts before they're embedded so glyphs may be missing or re-arranged.

require 'pdf/reader'

module ExtractFonts

  class Extractor

    def page(page)
      count = 0

      return count if page.fonts.nil? || page.fonts.empty?

      page.fonts.each do |label, font|
        next if complete_refs[font]
        complete_refs[font] = true

        process_font(page, font)

        count += 1
      end

      count
    end

    private

    def process_font(page, font)
      font = page.objects.deref(font)

      case font[:Subtype]
      when :Type0 then
        font[:DescendantFonts].each { |f| process_font(page, f) }
      when :TrueType, :CIDFontType2 then
        ExtractFonts::TTF.new(page.objects, font).save("#{font[:BaseFont]}.ttf")
      else
        $stderr.puts "unsupported font type #{font[:Subtype]}"
      end
    end

    def complete_refs
      @complete_refs ||= {}
    end

  end

  class TTF

    def initialize(objects, font)
      @objects, @font = objects, font
      @descriptor = @objects.deref(@font[:FontDescriptor])
    end

    def save(filename)
      puts "#{filename}"
      if @descriptor && @descriptor[:FontFile2]
        stream = @objects.deref(@descriptor[:FontFile2])
        File.open(filename, "wb") { |file| file.write stream.unfiltered_data }
      else
        $stderr.puts "- TTF font not embedded"
      end
    end
  end
end

filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-unicode.pdf"
extractor = ExtractFonts::Extractor.new

PDF::Reader.open(filename) do |reader|
  page = reader.page(1)
  extractor.page(page)
end

Version data entries

24 entries across 23 versions & 3 rubygems

Version Path
pdf-reader-2.5.0 examples/extract_fonts.rb
pdf-reader-2.4.2 examples/extract_fonts.rb
pdf-reader-2.4.1 examples/extract_fonts.rb
pdf-reader-2.4.0 examples/extract_fonts.rb
pdf-reader-2.3.0 examples/extract_fonts.rb
pdf-reader-2.2.1 examples/extract_fonts.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.5.0/gems/pdf-reader-2.2.0/examples/extract_fonts.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.4.0/gems/pdf-reader-2.2.0/examples/extract_fonts.rb
pdf-reader-2.2.0 examples/extract_fonts.rb
pdf-reader-2.1.0 examples/extract_fonts.rb
pdf-reader-2.0.0 examples/extract_fonts.rb
pdf-reader-2.0.0.beta1 examples/extract_fonts.rb
pdf-reader-1.4.1 examples/extract_fonts.rb
pdf-reader-1.4.0 examples/extract_fonts.rb
panjiva-pdf-reader-1.3.0 examples/extract_fonts.rb
pdf-reader-1.3.3 examples/extract_fonts.rb
pdf-reader-1.3.2 examples/extract_fonts.rb
pdf-reader-1.3.1 examples/extract_fonts.rb
pdf-reader-1.3.0 examples/extract_fonts.rb
pdf-reader-1.2.0 examples/extract_fonts.rb