extract_images.rb in pdf-reader-1.0.0.rc1

- old
+ new

@@ -1,5 +1,6 @@
+#!/usr/bin/env ruby
 # coding: utf-8
 
 # This demonstrates a way to extract some images (those based on the JPG or
 # TIFF formats) from a PDF. There are other ways to store images, so
 # it may need to be expanded for real world usage, but it should serve
@@ -12,43 +13,37 @@
 module ExtractImages
 
   class Extractor
 
     def page(page)
-      count = 0
-
-      process_resources(page, page.resources, count)
+      process_page(page, 0)
     end
 
     private
 
     def complete_refs
       @complete_refs ||= {}
     end
 
-    def process_resources(page, resources, count)
-      xobjects = resources[:XObject]
-      return count if xobjects.nil?
+    def process_page(page, count)
+      xobjects = page.xobjects
+      return count if xobjects.empty?
 
       xobjects.each do |name, stream|
-        next if complete_refs[stream]
-        complete_refs[stream] = true
-
-        stream = page.objects.deref(stream)
-
-        if stream.hash[:Subtype] == :Image
+        case stream.hash[:Subtype]
+        when :Image then
           count += 1
 
           case stream.hash[:Filter]
           when :CCITTFaxDecode then
             ExtractImages::Tiff.new(stream).save("#{page.number}-#{count}-#{name}.tif")
           when :DCTDecode      then
             ExtractImages::Jpg.new(stream).save("#{page.number}-#{count}-#{name}.jpg")
           else
             ExtractImages::Raw.new(stream).save("#{page.number}-#{count}-#{name}.tif")
           end
-        elsif stream.hash[:Subtype] == :Form
-          count = process_resources(page, PDF::Reader::FormXObject.new(page, stream).resources, count)
+        when :Form then
+          count = process_page(PDF::Reader::FormXObject.new(page, stream), count)
         end
       end
       count
     end