lib/docsplit/transparent_pdfs.rb in docsplit-0.7.4 vs lib/docsplit/transparent_pdfs.rb in docsplit-0.7.5

- old
+ new

@@ -6,21 +6,24 @@ # Temporarily convert any non-PDF documents to PDFs before running them # through further extraction. def ensure_pdfs(docs) [docs].flatten.map do |doc| - ext = File.extname(doc) - if ext.downcase == '.pdf' + if is_pdf?(doc) doc else tempdir = File.join(Dir.tmpdir, 'docsplit') extract_pdf([doc], {:output => tempdir}) - File.join(tempdir, File.basename(doc, ext) + '.pdf') + File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf') end end end + def is_pdf?(doc) + File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/ + end + end extend TransparentPDFs -end \ No newline at end of file +end