lib/docsplit.rb in docsplit-0.6.0 vs lib/docsplit.rb in docsplit-0.6.1
- old
+ new
@@ -1,23 +1,26 @@
# The Docsplit module delegates to the Java PDF extractors.
module Docsplit
- VERSION = '0.6.0' # Keep in sync with gemspec.
+ VERSION = '0.6.1' # Keep in sync with gemspec.
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
CLASSPATH = "#{ROOT}/build#{File::PATH_SEPARATOR}#{ROOT}/vendor/'*'"
LOGGING = "-Djava.util.logging.config.file=#{ROOT}/vendor/logging.properties"
HEADLESS = "-Djava.awt.headless=true"
- OFFICE = RUBY_PLATFORM.match(/darwin/i) ? '' : '-Doffice.home=/usr/lib/openoffice'
+ office ||= "/usr/lib/openoffice" if File.exists? '/usr/lib/openoffice'
+ office ||= "/usr/lib/libreoffice" if File.exists? '/usr/lib/libreoffice'
+ OFFICE = RUBY_PLATFORM.match(/darwin/i) ? '' : "-Doffice.home=#{office}"
+
METADATA_KEYS = [:author, :date, :creator, :keywords, :producer, :subject, :title, :length]
- GM_FORMATS = [:png, :gif, :jpg, :jpeg, :tif, :tiff, :bmp, :pnm, :ppm, :svg, :eps]
+ GM_FORMATS = ["image/gif", "image/jpeg", "image/png", "image/x-ms-bmp", "image/svg+xml", "image/tiff", "image/x-portable-bitmap", "application/postscript", "image/x-portable-pixmap"]
DEPENDENCIES = {:java => false, :gm => false, :pdftotext => false, :pdftk => false, :tesseract => false}
ESCAPE = lambda {|x| Shellwords.shellescape(x) }
@@ -63,13 +66,13 @@
[docs].flatten.each do |doc|
ext = File.extname(doc)
basename = File.basename(doc, ext)
escaped_doc, escaped_out, escaped_basename = [doc, out, basename].map(&ESCAPE)
- if ext.length > 0 && GM_FORMATS.include?(ext.sub(/^\./, '').downcase.to_sym)
+ if GM_FORMATS.include?(`file -b --mime #{doc}`.strip.split(/[:;]\s+/)[0])
`gm convert #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf`
else
- options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-3.jar -r #{ROOT}/vendor/conf/document-formats.js"
+ options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -r #{ROOT}/vendor/conf/document-formats.js"
run "#{options} #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf", [], {}
end
end
end