examples/extract_images.rb in pdf-reader-1.0.0.beta1 vs examples/extract_images.rb in pdf-reader-1.0.0.rc1
- old
+ new
@@ -1,5 +1,6 @@
+#!/usr/bin/env ruby
# coding: utf-8
# This demonstrates a way to extract some images (those based on the JPG or
# TIFF formats) from a PDF. There are other ways to store images, so
# it may need to be expanded for real world usage, but it should serve
@@ -12,43 +13,37 @@
module ExtractImages
class Extractor
def page(page)
- count = 0
-
- process_resources(page, page.resources, count)
+ process_page(page, 0)
end
private
def complete_refs
@complete_refs ||= {}
end
- def process_resources(page, resources, count)
- xobjects = resources[:XObject]
- return count if xobjects.nil?
+ def process_page(page, count)
+ xobjects = page.xobjects
+ return count if xobjects.empty?
xobjects.each do |name, stream|
- next if complete_refs[stream]
- complete_refs[stream] = true
-
- stream = page.objects.deref(stream)
-
- if stream.hash[:Subtype] == :Image
+ case stream.hash[:Subtype]
+ when :Image then
count += 1
case stream.hash[:Filter]
when :CCITTFaxDecode then
ExtractImages::Tiff.new(stream).save("#{page.number}-#{count}-#{name}.tif")
when :DCTDecode then
ExtractImages::Jpg.new(stream).save("#{page.number}-#{count}-#{name}.jpg")
else
ExtractImages::Raw.new(stream).save("#{page.number}-#{count}-#{name}.tif")
end
- elsif stream.hash[:Subtype] == :Form
- count = process_resources(page, PDF::Reader::FormXObject.new(page, stream).resources, count)
+ when :Form then
+ count = process_page(PDF::Reader::FormXObject.new(page, stream), count)
end
end
count
end