lib/docsplit.rb in docsplit-0.6.3 vs lib/docsplit.rb in docsplit-0.6.4
- old
+ new
@@ -1,15 +1,22 @@
+require 'tmpdir'
+require 'fileutils'
+require 'shellwords'
+
# The Docsplit module delegates to the Java PDF extractors.
module Docsplit
- VERSION = '0.6.3' # Keep in sync with gemspec.
+ VERSION = '0.6.4' # Keep in sync with gemspec.
+ ESCAPE = lambda {|x| Shellwords.shellescape(x) }
+
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
+ ESCAPED_ROOT = ESCAPE[ROOT]
- CLASSPATH = "#{ROOT}/build#{File::PATH_SEPARATOR}#{ROOT}/vendor/'*'"
+ CLASSPATH = "#{ESCAPED_ROOT}/build#{File::PATH_SEPARATOR}#{ESCAPED_ROOT}/vendor/'*'"
- LOGGING = "-Djava.util.logging.config.file=#{ROOT}/vendor/logging.properties"
+ LOGGING = "-Djava.util.logging.config.file=#{ESCAPED_ROOT}/vendor/logging.properties"
HEADLESS = "-Djava.awt.headless=true"
office ||= "/usr/lib/openoffice" if File.exists? '/usr/lib/openoffice'
office ||= "/usr/lib/libreoffice" if File.exists? '/usr/lib/libreoffice'
@@ -18,14 +25,12 @@
METADATA_KEYS = [:author, :date, :creator, :keywords, :producer, :subject, :title, :length]
GM_FORMATS = ["image/gif", "image/jpeg", "image/png", "image/x-ms-bmp", "image/svg+xml", "image/tiff", "image/x-portable-bitmap", "application/postscript", "image/x-portable-pixmap"]
- DEPENDENCIES = {:java => false, :gm => false, :pdftotext => false, :pdftk => false, :tesseract => false}
+ DEPENDENCIES = {:java => false, :gm => false, :pdftotext => false, :pdftk => false, :pdftailor => false, :tesseract => false}
- ESCAPE = lambda {|x| Shellwords.shellescape(x) }
-
# Check for all dependencies, and note their absence.
dirs = ENV['PATH'].split(File::PATH_SEPARATOR)
DEPENDENCIES.each_key do |dep|
dirs.each do |dir|
if File.executable?(File.join(dir, dep.to_s))
@@ -69,11 +74,11 @@
escaped_doc, escaped_out, escaped_basename = [doc, out, basename].map(&ESCAPE)
if GM_FORMATS.include?(`file -b --mime #{ESCAPE[doc]}`.strip.split(/[:;]\s+/)[0])
`gm convert #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf`
else
- options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -r #{ROOT}/vendor/conf/document-formats.js"
+ options = "-jar #{ESCAPED_ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -r #{ESCAPED_ROOT}/vendor/conf/document-formats.js"
run "#{options} #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf", [], {}
end
end
end
@@ -115,12 +120,9 @@
end
end
end
-require 'tmpdir'
-require 'fileutils'
-require 'shellwords'
require "#{Docsplit::ROOT}/lib/docsplit/image_extractor"
require "#{Docsplit::ROOT}/lib/docsplit/transparent_pdfs"
require "#{Docsplit::ROOT}/lib/docsplit/text_extractor"
require "#{Docsplit::ROOT}/lib/docsplit/page_extractor"
require "#{Docsplit::ROOT}/lib/docsplit/info_extractor"