lib/libreconv.rb in libreconv-0.9.4 vs lib/libreconv.rb in libreconv-0.9.5

- old
+ new

@@ -5,140 +5,177 @@ require 'net/http' require 'tmpdir' require 'securerandom' require 'open3' +# Convert office documents using LibreOffice / OpenOffice to one of their supported formats. module Libreconv class ConversionFailedError < StandardError; end - SOURCE_TYPES = { - file: 1, - url: 2 - }.freeze - + # @param [String] source Path or URL of the source file. + # @param [String] target Target file path. + # @param [String] soffice_command Path to the soffice binary. + # @param [String] convert_to Format to convert to (default: 'pdf'). + # @raise [IOError] If invalid source file/URL or soffice command not found. + # @raise [URI::Error] When URI parsing error. + # @raise [Net::ProtocolError] If source URL checking failed. + # @raise [ConversionFailedError] When soffice command execution error. def self.convert(source, target, soffice_command = nil, convert_to = nil) Converter.new(source, target, soffice_command, convert_to).convert end class Converter + # @return [String] attr_accessor :soffice_command + # @param [String] source Path or URL of the source file. + # @param [String] target Target file path. + # @param [String] soffice_command Path to the soffice binary. + # @param [String] convert_to Format to convert to (default: 'pdf'). + # @raise [IOError] If invalid source file/URL or soffice command not found. + # @raise [URI::Error] When URI parsing error. + # @raise [Net::ProtocolError] If source URL checking failed. def initialize(source, target, soffice_command = nil, convert_to = nil) - @source = source + @source = check_source_type(source) @target = target - @soffice_command = - soffice_command || - which('soffice') || - which('soffice.bin') + @soffice_command = soffice_command || which('soffice') || which('soffice.bin') @convert_to = convert_to || 'pdf' - @source_type = check_source_type - # If the URL contains GET params, the '&' could break when - # being used as an argument to soffice. Wrap it in single - # quotes to escape it. Then strip them from the target - # temp file name. - @escaped_source = - if @source_type == 1 - @source - else - "'#{@source}'" - end - @escaped_source_path = - if @source_type == 1 - @source - else - URI.parse(@source).path - end - ensure_soffice_exists end + # @raise [ConversionFailedError] When soffice command execution error. def convert - pipe_name = 'soffice-pipe-' + SecureRandom.uuid.to_s - pipe_path = File.join Dir.tmpdir, pipe_name + tmp_pipe_path = File.join(Dir.tmpdir, "soffice-pipe-#{SecureRandom.uuid}") Dir.mktmpdir do |target_path| - accept_args = [ - 'pipe,name=' + pipe_name, - 'url', - 'StarOffice.ServiceManager' - ].join(';') + command = build_command(tmp_pipe_path, target_path) + target_tmp_file = execute_command(command, target_path) - command = [ - soffice_command, - "--accept=\"#{accept_args}\"", - '-env:UserInstallation=file:///' + - pipe_path.gsub('\\', '/').gsub(%r{^/}, ''), - '--headless', - '--convert-to', - @convert_to, - @escaped_source, - '--outdir', - target_path - ] + FileUtils.cp target_tmp_file, @target + end + ensure + FileUtils.rm_rf tmp_pipe_path if File.exist?(tmp_pipe_path) + end - output, error, status = Open3.capture3( - { - 'HOME' => ENV['HOME'], - 'PATH' => ENV['PATH'], - 'LANG' => ENV['LANG'], - 'LD_LIBRARY_PATH' => ENV['LD_LIBRARY_PATH'], - 'SYSTEMROOT' => ENV['SYSTEMROOT'], - 'TEMP' => ENV['TEMP'] - }, - *command, - unsetenv_others: true - ) + private - FileUtils.rm_rf pipe_path if File.exist?(pipe_path) - unless status.success? - raise ConversionFailedError, - 'Conversion failed! Output: ' + output.strip.inspect + - ', Error: ' + error.strip.inspect + # @param [Array<String>] command + # @param [String] target_path + # @return [String] + # @raise [ConversionFailedError] When soffice command execution error. + def execute_command(command, target_path) + output, error, status = + if RUBY_PLATFORM =~ /java/ + Open3.capture3(*command) + else + Open3.capture3(command_env, *command, unsetenv_others: true) end - target_tmp_file = File.join( - target_path, - File.basename(@escaped_source_path, '.*') + '.' + - File.basename(@convert_to, ':*') - ) - FileUtils.cp target_tmp_file, @target - end + target_tmp_file = File.join(target_path, target_filename) + return target_tmp_file if status.success? && File.exist?(target_tmp_file) + + raise ConversionFailedError, + "Conversion failed! Output: #{output.strip.inspect}, Error: #{error.strip.inspect}" end - private + # @return [Hash] + def command_env + Hash[%w[HOME PATH LANG LD_LIBRARY_PATH SYSTEMROOT TEMP].map { |k| [k, ENV[k]] }] + end + # @param [String] tmp_pipe_path + # @param [String] target_path + # @return [Array<String>] + def build_command(tmp_pipe_path, target_path) + [ + soffice_command, + "--accept=\"pipe,name=#{File.basename(tmp_pipe_path)};url;StarOffice.ServiceManager\"", + "-env:UserInstallation=#{build_file_uri(tmp_pipe_path)}", + '--headless', + '--convert-to', @convert_to, + escaped_source, + '--outdir', target_path + ] + end + + # If the URL contains GET params, the '&' could break when being used as an argument to soffice. + # Wrap it in single quotes to escape it. Then strip them from the target temp file name. + # @return [String] + def escaped_source + # TODO: @source.is_a?(URI::Generic) ? "'#{@source}'" : @source + @source.to_s + end + + # @return [String] + def escaped_source_path + @source.is_a?(URI::Generic) ? @source.path : @source + end + + # @return [String] + def target_filename + File.basename(escaped_source_path, '.*') + '.' + File.basename(@convert_to, ':*') + end + + # @raise [IOError] If soffice headless command line tool not found. def ensure_soffice_exists return if soffice_command && File.exist?(soffice_command) raise IOError, 'Can\'t find LibreOffice or OpenOffice executable.' end + # @param [String] cmd + # @return [String, nil] def which(cmd) exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] + ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| exts.each do |ext| - exe = File.join(path, cmd + ext) + exe = File.expand_path("#{cmd}#{ext}", path) return exe if File.executable? exe end end nil end - def check_source_type - if File.exist?(@source) && !File.directory?(@source) - return SOURCE_TYPES[:file] + # @param [String] source + # @return [String, URI::HTTP] + # @raise [IOError] If invalid source file/URL. + # @raise [URI::Error] When URI parsing error. + # @raise [Net::ProtocolError] If source URL checking failed. + def check_source_type(source) + if File.exist?(source) + return source unless File.directory?(source) + elsif (uri = check_valid_url(source)) + return uri end - if URI(@source).scheme == 'http' && - Net::HTTP.get_response(URI(@source)).is_a?(Net::HTTPSuccess) - return SOURCE_TYPES[:url] + + raise IOError, "Source (#{source}) is neither a file nor a URL." + end + + # @param [String] url + # @return [URI::HTTP, false, nil] + # @raise [URI::Error] When URI parsing error. + # @raise [Net::ProtocolError] If source URL checking failed. + def check_valid_url(url) + uri = URI(url) + return false unless uri.is_a?(URI::HTTP) + + Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http| + response = http.head(uri.request_uri) + return check_valid_url(response['location']) if response.is_a?(Net::HTTPRedirection) + + return response.is_a?(Net::HTTPSuccess) ? uri : nil end - if URI(@source).scheme == 'https' && - Net::HTTP.get_response(URI(@source)).is_a?(Net::HTTPSuccess) - return SOURCE_TYPES[:url] - end + end - raise IOError, "Source (#{@source}) is neither a file nor a URL." + # @param [String] path + # @return [String] + def build_file_uri(path) + separators = /[#{Regexp.quote "#{File::SEPARATOR}#{File::ALT_SEPARATOR}"}]/ + unsafe = Regexp.new("[^#{URI::PATTERN::UNRESERVED}/?:]") + + 'file:///' + URI::DEFAULT_PARSER.escape(path.gsub(separators, '/').sub(%r{^/+}, ''), unsafe) end end end