Sha256: 722cf16024e1cd9dcf8988b18c1e47c57d32e2cb17af28bb827af52b51a53c7d
Contents?: true
Size: 1.77 KB
Versions: 2
Compression:
Stored size: 1.77 KB
Contents
# encoding: utf-8 raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/ require "rika/version" require 'uri' require 'net/http' require 'java' Dir[File.join(File.dirname(__FILE__), "../target/dependency/*.jar")].each do |jar| require jar end # Heavily based on the Apache Tika API: http://tika.apache.org/1.2/api/org/apache/tika/Tika.html module Rika import org.apache.tika.metadata.Metadata import org.apache.tika.Tika import java.io.FileInputStream import java.net.URL class Parser def initialize(file_location, max_content_length = -1) @uri = file_location @tika = Tika.new @tika.set_max_string_length(max_content_length) @metadata = Metadata.new @is_file = File.exists?(@uri) && File.directory?(@uri) == false is_http = URI(@uri).scheme == "http" && Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess) if !@is_file if !@is_file && !is_http raise IOError, "File does not exist or can't be reached." end end def content self.parse @content end def metadata self.parse metadata_hash = {} @metadata.names.each do |name| metadata_hash[name] = @metadata.get(name) end metadata_hash end def media_type @media_type ||= @tika.detect(input_stream) end def available_metadata self.parse @metadata.names.to_a end def metadata_exists?(name) self.parse @metadata.get(name) != nil end protected def parse @content ||= @tika.parse_to_string(input_stream, @metadata).to_s.strip end def input_stream if @is_file FileInputStream.new(java.io.File.new(@uri)) else URL.new(@uri).open_stream end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
rika-0.9.7-java | lib/rika.rb |
rika-0.9.6-java | lib/rika.rb |