Sha256: 86027c588a3a184b6580802e50cdae42aca6ac30c88afc37fa9847b017a74549

Contents?: true

Size: 1.8 KB

Versions: 1

Compression:

Stored size: 1.8 KB

Contents

raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/

require "rika/version"
require 'uri'
require 'net/http'
require 'java' 

Dir[File.join(File.dirname(__FILE__), "../target/dependency/*.jar")].each do |jar|
  require jar
end

# Heavily based on the Apache Tika API: http://tika.apache.org/1.2/api/org/apache/tika/Tika.html
module Rika
  import org.apache.tika.metadata.Metadata
  import org.apache.tika.Tika
  
  class Parser
    
    def initialize(uri, max_content_length = -1)
      p = URI::Parser.new
      @uri = uri
      @tika = Tika.new
      @tika.set_max_string_length(max_content_length)
      @metadata = Metadata.new
      
      if File.exists?(@uri)
        self.parse_file
      elsif p.parse(@uri).scheme == 'http' || p.parse(@uri).scheme == 'https'
        self.parse_url
      else
        raise IOError, "File does not exist or can't be reached."
      end
    end

    def content
      @content.to_s.strip
    end

    def metadata
      metadata_hash = {}
      
      @metadata.names.each do |name|
        metadata_hash[name] = @metadata.get(name) 
      end

      metadata_hash
    end

    def available_metadata
      @metadata.names.to_a
    end

    def metadata_exists?(name)
      @metadata.get(name) != nil
    end

    protected
    
    def parse_file
      input_stream = java.io.FileInputStream.new(java.io.File.new(@uri))
      @metadata.set("filename", File.basename(@uri))
      @content = @tika.parse_to_string(input_stream, @metadata) 
    end

    def parse_url
      raise IOError, "File does not exist or can't be reached." if not Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess)
      url = java.net.URL.new(@uri)
      input_stream = url.open_stream
      @metadata.set("url", @uri)
      @content = @tika.parse_to_string(input_stream, @metadata) 
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
rika-0.9.3-java lib/rika.rb