lib/libreconv.rb in libreconv-0.9.4 vs lib/libreconv.rb in libreconv-0.9.5
- old
+ new
@@ -5,140 +5,177 @@
require 'net/http'
require 'tmpdir'
require 'securerandom'
require 'open3'
+# Convert office documents using LibreOffice / OpenOffice to one of their supported formats.
module Libreconv
class ConversionFailedError < StandardError; end
- SOURCE_TYPES = {
- file: 1,
- url: 2
- }.freeze
-
+ # @param [String] source Path or URL of the source file.
+ # @param [String] target Target file path.
+ # @param [String] soffice_command Path to the soffice binary.
+ # @param [String] convert_to Format to convert to (default: 'pdf').
+ # @raise [IOError] If invalid source file/URL or soffice command not found.
+ # @raise [URI::Error] When URI parsing error.
+ # @raise [Net::ProtocolError] If source URL checking failed.
+ # @raise [ConversionFailedError] When soffice command execution error.
def self.convert(source, target, soffice_command = nil, convert_to = nil)
Converter.new(source, target, soffice_command, convert_to).convert
end
class Converter
+ # @return [String]
attr_accessor :soffice_command
+ # @param [String] source Path or URL of the source file.
+ # @param [String] target Target file path.
+ # @param [String] soffice_command Path to the soffice binary.
+ # @param [String] convert_to Format to convert to (default: 'pdf').
+ # @raise [IOError] If invalid source file/URL or soffice command not found.
+ # @raise [URI::Error] When URI parsing error.
+ # @raise [Net::ProtocolError] If source URL checking failed.
def initialize(source, target, soffice_command = nil, convert_to = nil)
- @source = source
+ @source = check_source_type(source)
@target = target
- @soffice_command =
- soffice_command ||
- which('soffice') ||
- which('soffice.bin')
+ @soffice_command = soffice_command || which('soffice') || which('soffice.bin')
@convert_to = convert_to || 'pdf'
- @source_type = check_source_type
- # If the URL contains GET params, the '&' could break when
- # being used as an argument to soffice. Wrap it in single
- # quotes to escape it. Then strip them from the target
- # temp file name.
- @escaped_source =
- if @source_type == 1
- @source
- else
- "'#{@source}'"
- end
- @escaped_source_path =
- if @source_type == 1
- @source
- else
- URI.parse(@source).path
- end
-
ensure_soffice_exists
end
+ # @raise [ConversionFailedError] When soffice command execution error.
def convert
- pipe_name = 'soffice-pipe-' + SecureRandom.uuid.to_s
- pipe_path = File.join Dir.tmpdir, pipe_name
+ tmp_pipe_path = File.join(Dir.tmpdir, "soffice-pipe-#{SecureRandom.uuid}")
Dir.mktmpdir do |target_path|
- accept_args = [
- 'pipe,name=' + pipe_name,
- 'url',
- 'StarOffice.ServiceManager'
- ].join(';')
+ command = build_command(tmp_pipe_path, target_path)
+ target_tmp_file = execute_command(command, target_path)
- command = [
- soffice_command,
- "--accept=\"#{accept_args}\"",
- '-env:UserInstallation=file:///' +
- pipe_path.gsub('\\', '/').gsub(%r{^/}, ''),
- '--headless',
- '--convert-to',
- @convert_to,
- @escaped_source,
- '--outdir',
- target_path
- ]
+ FileUtils.cp target_tmp_file, @target
+ end
+ ensure
+ FileUtils.rm_rf tmp_pipe_path if File.exist?(tmp_pipe_path)
+ end
- output, error, status = Open3.capture3(
- {
- 'HOME' => ENV['HOME'],
- 'PATH' => ENV['PATH'],
- 'LANG' => ENV['LANG'],
- 'LD_LIBRARY_PATH' => ENV['LD_LIBRARY_PATH'],
- 'SYSTEMROOT' => ENV['SYSTEMROOT'],
- 'TEMP' => ENV['TEMP']
- },
- *command,
- unsetenv_others: true
- )
+ private
- FileUtils.rm_rf pipe_path if File.exist?(pipe_path)
- unless status.success?
- raise ConversionFailedError,
- 'Conversion failed! Output: ' + output.strip.inspect +
- ', Error: ' + error.strip.inspect
+ # @param [Array<String>] command
+ # @param [String] target_path
+ # @return [String]
+ # @raise [ConversionFailedError] When soffice command execution error.
+ def execute_command(command, target_path)
+ output, error, status =
+ if RUBY_PLATFORM =~ /java/
+ Open3.capture3(*command)
+ else
+ Open3.capture3(command_env, *command, unsetenv_others: true)
end
- target_tmp_file = File.join(
- target_path,
- File.basename(@escaped_source_path, '.*') + '.' +
- File.basename(@convert_to, ':*')
- )
- FileUtils.cp target_tmp_file, @target
- end
+ target_tmp_file = File.join(target_path, target_filename)
+ return target_tmp_file if status.success? && File.exist?(target_tmp_file)
+
+ raise ConversionFailedError,
+ "Conversion failed! Output: #{output.strip.inspect}, Error: #{error.strip.inspect}"
end
- private
+ # @return [Hash]
+ def command_env
+ Hash[%w[HOME PATH LANG LD_LIBRARY_PATH SYSTEMROOT TEMP].map { |k| [k, ENV[k]] }]
+ end
+ # @param [String] tmp_pipe_path
+ # @param [String] target_path
+ # @return [Array<String>]
+ def build_command(tmp_pipe_path, target_path)
+ [
+ soffice_command,
+ "--accept=\"pipe,name=#{File.basename(tmp_pipe_path)};url;StarOffice.ServiceManager\"",
+ "-env:UserInstallation=#{build_file_uri(tmp_pipe_path)}",
+ '--headless',
+ '--convert-to', @convert_to,
+ escaped_source,
+ '--outdir', target_path
+ ]
+ end
+
+ # If the URL contains GET params, the '&' could break when being used as an argument to soffice.
+ # Wrap it in single quotes to escape it. Then strip them from the target temp file name.
+ # @return [String]
+ def escaped_source
+ # TODO: @source.is_a?(URI::Generic) ? "'#{@source}'" : @source
+ @source.to_s
+ end
+
+ # @return [String]
+ def escaped_source_path
+ @source.is_a?(URI::Generic) ? @source.path : @source
+ end
+
+ # @return [String]
+ def target_filename
+ File.basename(escaped_source_path, '.*') + '.' + File.basename(@convert_to, ':*')
+ end
+
+ # @raise [IOError] If soffice headless command line tool not found.
def ensure_soffice_exists
return if soffice_command && File.exist?(soffice_command)
raise IOError, 'Can\'t find LibreOffice or OpenOffice executable.'
end
+ # @param [String] cmd
+ # @return [String, nil]
def which(cmd)
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
+
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
exts.each do |ext|
- exe = File.join(path, cmd + ext)
+ exe = File.expand_path("#{cmd}#{ext}", path)
return exe if File.executable? exe
end
end
nil
end
- def check_source_type
- if File.exist?(@source) && !File.directory?(@source)
- return SOURCE_TYPES[:file]
+ # @param [String] source
+ # @return [String, URI::HTTP]
+ # @raise [IOError] If invalid source file/URL.
+ # @raise [URI::Error] When URI parsing error.
+ # @raise [Net::ProtocolError] If source URL checking failed.
+ def check_source_type(source)
+ if File.exist?(source)
+ return source unless File.directory?(source)
+ elsif (uri = check_valid_url(source))
+ return uri
end
- if URI(@source).scheme == 'http' &&
- Net::HTTP.get_response(URI(@source)).is_a?(Net::HTTPSuccess)
- return SOURCE_TYPES[:url]
+
+ raise IOError, "Source (#{source}) is neither a file nor a URL."
+ end
+
+ # @param [String] url
+ # @return [URI::HTTP, false, nil]
+ # @raise [URI::Error] When URI parsing error.
+ # @raise [Net::ProtocolError] If source URL checking failed.
+ def check_valid_url(url)
+ uri = URI(url)
+ return false unless uri.is_a?(URI::HTTP)
+
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
+ response = http.head(uri.request_uri)
+ return check_valid_url(response['location']) if response.is_a?(Net::HTTPRedirection)
+
+ return response.is_a?(Net::HTTPSuccess) ? uri : nil
end
- if URI(@source).scheme == 'https' &&
- Net::HTTP.get_response(URI(@source)).is_a?(Net::HTTPSuccess)
- return SOURCE_TYPES[:url]
- end
+ end
- raise IOError, "Source (#{@source}) is neither a file nor a URL."
+ # @param [String] path
+ # @return [String]
+ def build_file_uri(path)
+ separators = /[#{Regexp.quote "#{File::SEPARATOR}#{File::ALT_SEPARATOR}"}]/
+ unsafe = Regexp.new("[^#{URI::PATTERN::UNRESERVED}/?:]")
+
+ 'file:///' + URI::DEFAULT_PARSER.escape(path.gsub(separators, '/').sub(%r{^/+}, ''), unsafe)
end
end
end