lib/docsplit/info_extractor.rb in docsplit-0.6.4 vs lib/docsplit/info_extractor.rb in docsplit-0.7.0

- old
+ new

@@ -15,18 +15,36 @@ :length => /^Pages:\s+([^\n]+)/, } # Pull out a single datum from a pdf. def extract(key, pdfs, opts) + extract_all(pdfs, opts)[key] + end + + def extract_all(pdfs, opts) pdf = [pdfs].flatten.first cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1" result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 - match = result.match(MATCHERS[key]) - answer = match && match[1] - answer = answer.to_i if answer && key == :length - answer + # ruby 1.8 (iconv) and 1.9 (String#encode) : + if String.method_defined?(:encode) + result.encode!('UTF-8', 'UTF-8', :invalid => :replace) + else + require 'iconv' unless defined?(Iconv) + ic = Iconv.new('UTF-8//IGNORE','UTF-8') + result = ic.iconv(result) + end + info = {} + MATCHERS.each do |key, matcher| + match = result.match(matcher) + answer = match && match[1] + if answer + answer = answer.to_i if key == :length + info[key] = answer + end + end + info end end -end \ No newline at end of file +end