# frozen_string_literal: true require 'libreconv/version' require 'uri' require 'net/http' require 'tmpdir' require 'securerandom' require 'open3' # Convert office documents using LibreOffice / OpenOffice to one of their supported formats. module Libreconv class ConversionFailedError < StandardError; end # @param [String] source Path or URL of the source file. # @param [String] target Target file path. # @param [String] soffice_command Path to the soffice binary. # @param [String] convert_to Format to convert to (default: 'pdf'). # @raise [IOError] If invalid source file/URL or soffice command not found. # @raise [URI::Error] When URI parsing error. # @raise [Net::ProtocolError] If source URL checking failed. # @raise [ConversionFailedError] When soffice command execution error. def self.convert(source, target, soffice_command = nil, convert_to = nil) Converter.new(source, target, soffice_command, convert_to).convert end class Converter # @return [String] attr_accessor :soffice_command # @param [String] source Path or URL of the source file. # @param [String] target Target file path. # @param [String] soffice_command Path to the soffice binary. # @param [String] convert_to Format to convert to (default: 'pdf'). # @raise [IOError] If invalid source file/URL or soffice command not found. # @raise [URI::Error] When URI parsing error. # @raise [Net::ProtocolError] If source URL checking failed. def initialize(source, target, soffice_command = nil, convert_to = nil) @source = check_source_type(source) @target = target @soffice_command = soffice_command || which('soffice') || which('soffice.bin') @convert_to = convert_to || 'pdf' ensure_soffice_exists end # @raise [ConversionFailedError] When soffice command execution error. def convert tmp_pipe_path = File.join(Dir.tmpdir, "soffice-pipe-#{SecureRandom.uuid}") Dir.mktmpdir do |target_path| command = build_command(tmp_pipe_path, target_path) target_tmp_file = execute_command(command, target_path) FileUtils.cp target_tmp_file, @target end ensure FileUtils.rm_rf tmp_pipe_path if File.exist?(tmp_pipe_path) end private # @param [Array] command # @param [String] target_path # @return [String] # @raise [ConversionFailedError] When soffice command execution error. def execute_command(command, target_path) output, error, status = if RUBY_PLATFORM =~ /java/ Open3.capture3(*command) else Open3.capture3(command_env, *command, unsetenv_others: true) end target_tmp_file = File.join(target_path, target_filename) return target_tmp_file if status.success? && File.exist?(target_tmp_file) raise ConversionFailedError, "Conversion failed! Output: #{output.strip.inspect}, Error: #{error.strip.inspect}" end # @return [Hash] def command_env Hash[%w[HOME PATH LANG LD_LIBRARY_PATH SYSTEMROOT TEMP].map { |k| [k, ENV[k]] }] end # @param [String] tmp_pipe_path # @param [String] target_path # @return [Array] def build_command(tmp_pipe_path, target_path) [ soffice_command, "--accept=\"pipe,name=#{File.basename(tmp_pipe_path)};url;StarOffice.ServiceManager\"", "-env:UserInstallation=#{build_file_uri(tmp_pipe_path)}", '--headless', '--convert-to', @convert_to, escaped_source, '--outdir', target_path ] end # If the URL contains GET params, the '&' could break when being used as an argument to soffice. # Wrap it in single quotes to escape it. Then strip them from the target temp file name. # @return [String] def escaped_source # TODO: @source.is_a?(URI::Generic) ? "'#{@source}'" : @source @source.to_s end # @return [String] def escaped_source_path @source.is_a?(URI::Generic) ? @source.path : @source end # @return [String] def target_filename File.basename(escaped_source_path, '.*') + '.' + File.basename(@convert_to, ':*') end # @raise [IOError] If soffice headless command line tool not found. def ensure_soffice_exists return if soffice_command && File.exist?(soffice_command) raise IOError, 'Can\'t find LibreOffice or OpenOffice executable.' end # @param [String] cmd # @return [String, nil] def which(cmd) exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| exts.each do |ext| exe = File.expand_path("#{cmd}#{ext}", path) return exe if File.executable? exe end end nil end # @param [String] source # @return [String, URI::HTTP] # @raise [IOError] If invalid source file/URL. # @raise [URI::Error] When URI parsing error. # @raise [Net::ProtocolError] If source URL checking failed. def check_source_type(source) if File.exist?(source) return source unless File.directory?(source) elsif (uri = check_valid_url(source)) return uri end raise IOError, "Source (#{source}) is neither a file nor a URL." end # @param [String] url # @return [URI::HTTP, false, nil] # @raise [URI::Error] When URI parsing error. # @raise [Net::ProtocolError] If source URL checking failed. def check_valid_url(url) uri = URI(url) return false unless uri.is_a?(URI::HTTP) Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http| response = http.head(uri.request_uri) return check_valid_url(response['location']) if response.is_a?(Net::HTTPRedirection) return response.is_a?(Net::HTTPSuccess) ? uri : nil end end # @param [String] path # @return [String] def build_file_uri(path) separators = /[#{Regexp.quote "#{File::SEPARATOR}#{File::ALT_SEPARATOR}"}]/ unsafe = Regexp.new("[^#{URI::PATTERN::UNRESERVED}/?:]") 'file:///' + URI::DEFAULT_PARSER.escape(path.gsub(separators, '/').sub(%r{^/+}, ''), unsafe) end end end