examples/extract_images.rb in pdf-reader-1.0.0.beta1 vs examples/extract_images.rb in pdf-reader-1.0.0.rc1

- old
+ new

@@ -1,5 +1,6 @@ +#!/usr/bin/env ruby # coding: utf-8 # This demonstrates a way to extract some images (those based on the JPG or # TIFF formats) from a PDF. There are other ways to store images, so # it may need to be expanded for real world usage, but it should serve @@ -12,43 +13,37 @@ module ExtractImages class Extractor def page(page) - count = 0 - - process_resources(page, page.resources, count) + process_page(page, 0) end private def complete_refs @complete_refs ||= {} end - def process_resources(page, resources, count) - xobjects = resources[:XObject] - return count if xobjects.nil? + def process_page(page, count) + xobjects = page.xobjects + return count if xobjects.empty? xobjects.each do |name, stream| - next if complete_refs[stream] - complete_refs[stream] = true - - stream = page.objects.deref(stream) - - if stream.hash[:Subtype] == :Image + case stream.hash[:Subtype] + when :Image then count += 1 case stream.hash[:Filter] when :CCITTFaxDecode then ExtractImages::Tiff.new(stream).save("#{page.number}-#{count}-#{name}.tif") when :DCTDecode then ExtractImages::Jpg.new(stream).save("#{page.number}-#{count}-#{name}.jpg") else ExtractImages::Raw.new(stream).save("#{page.number}-#{count}-#{name}.tif") end - elsif stream.hash[:Subtype] == :Form - count = process_resources(page, PDF::Reader::FormXObject.new(page, stream).resources, count) + when :Form then + count = process_page(PDF::Reader::FormXObject.new(page, stream), count) end end count end