Sha256: 0344b52fecbfddc400d27d491afd2343fd7c63fc1a483bae08b146b2a188b879
Contents?: true
Size: 1.45 KB
Versions: 1
Compression:
Stored size: 1.45 KB
Contents
# encoding: utf-8 raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/ require "rika/version" require 'uri' require 'open-uri' require_relative 'rika/parser' require_relative '../java-lib/tika-app-1.24.1.jar' module Rika import org.apache.tika.metadata.Metadata import org.apache.tika.Tika import org.apache.tika.language.LanguageIdentifier import org.apache.tika.detect.DefaultDetector import java.io.FileInputStream import java.net.URL def self.parse_content_and_metadata(file_location, max_content_length = -1) parser = Parser.new(file_location, max_content_length) [parser.content, parser.metadata] end def self.parse_content_and_metadata_as_hash(file_location, max_content_length = -1) content, metadata = parse_content_and_metadata(file_location, max_content_length) { content: content, metadata: metadata } end def self.parse_content(file_location, max_content_length = -1) Parser.new(file_location, max_content_length).content end # Regarding max_content_length, the default is set at 0 to save unnecessary processing, # since the content is being ignored. However, the PDF metadata "pdf:unmappedUnicodeCharsPerPage" # and "pdf:charsPerPage" will be absent if the max_content_length is 0, and will be # ]may differ depending on # the number of characters read. def self.parse_metadata(file_location, max_content_length = 0) Parser.new(file_location, max_content_length).metadata end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rika-1.11.1-java | lib/rika.rb |