lib/docsplit/transparent_pdfs.rb in docsplit-0.7.4 vs lib/docsplit/transparent_pdfs.rb in docsplit-0.7.5
- old
+ new
@@ -6,21 +6,24 @@
# Temporarily convert any non-PDF documents to PDFs before running them
# through further extraction.
def ensure_pdfs(docs)
[docs].flatten.map do |doc|
- ext = File.extname(doc)
- if ext.downcase == '.pdf'
+ if is_pdf?(doc)
doc
else
tempdir = File.join(Dir.tmpdir, 'docsplit')
extract_pdf([doc], {:output => tempdir})
- File.join(tempdir, File.basename(doc, ext) + '.pdf')
+ File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
end
end
end
+ def is_pdf?(doc)
+ File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
+ end
+
end
extend TransparentPDFs
-end
\ No newline at end of file
+end