Sha256: 2edeac708e6ce8570b18aa62f3c5078b7962e7174b92f0d743558becc470b544
Contents?: true
Size: 1.48 KB
Versions: 1
Compression:
Stored size: 1.48 KB
Contents
# Based on the rake remote task code require 'rubygems' require 'stringio' require 'open4' class RubyTikaApp class Error < RuntimeError; end class CommandFailedError < Error attr_reader :status def initialize(status) @status = status end end def initialize(document) if (document =~ /https?:\/\/[\S]+/) == 0 @document = document else @document = "file://#{document}" end java_cmd = 'java' java_args = '-server -Djava.awt.headless=true' tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.14.jar" @tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}' --encoding='UTF-8'" end def to_xml run_tika('--xml') end def to_html run_tika('--html') end def to_json run_tika('--json') end def to_text run_tika('--text') end def to_text_main run_tika('--text-main') end def to_metadata run_tika('--metadata') end private def run_tika(option) final_cmd = "#{@tika_cmd} #{option} '#{@document}'" pid, stdin, stdout, stderr = Open4::popen4(final_cmd) stdout_result = stdout.read.strip stderr_result = stderr.read.strip unless strip_stderr(stderr_result).empty? raise(CommandFailedError.new(stderr_result), "execution failed with status #{stderr_result}: #{final_cmd}") end stdout_result ensure stdin.close stdout.close stderr.close end def strip_stderr(s) s.gsub(/^(info|warn) - .*$/i, '').strip end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
sw_ruby_tika_app-1.14.0 | lib/ruby_tika_app.rb |