Sha256: f89250c299a5803080586fef01b173d2e0d1e4bf27f10f86cabd8408a503344e

Contents?: true

Size: 1.3 KB

Versions: 1

Compression:

Stored size: 1.3 KB

Contents

# Based on the rake remote task code

require 'rubygems'
require 'stringio'
require 'open4'

class RubyTikaApp

  class Error < RuntimeError; end

  class CommandFailedError < Error
    attr_reader :status
    def initialize status
      @status = status
    end
  end

  def initialize(document)

    @document = document

    java_cmd = 'java'
    java_args = '-server -Djava.awt.headless=true'
    tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.2.jar"

    @tika_cmd = "#{java_cmd} #{java_args} -jar #{tika_path}"
  end

  def to_xml
    run_tika('--xml')
  end

  def to_html
    run_tika('--html')
  end

  def to_json
    run_tika('--json')
  end

  def to_text
    run_tika('--text')
  end

  def to_text_main
    run_tika('--text-main')
  end

  def to_metadata
    run_tika('--metadata')
  end

  private

  def run_tika(option)

    final_cmd = "#{@tika_cmd} #{option} #{@document}"
    result = []


    pid, stdin, stdout, stderr = Open4::popen4(final_cmd)

    stdout_result = stdout.read.strip
    stderr_result = stderr.read.strip

    unless stderr_result.strip == "" then
      raise(CommandFailedError.new(stderr_result),
            "execution failed with status #{stderr_result}: #{final_cmd}")
    end

    stdout_result
  ensure
    stdin.close
    stdout.close
    stderr.close
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ruby_tika_app-0.3 lib/ruby_tika_app.rb