Sha256: f5cfd221f69d08153809a773a5369e85df6067014ec14c373d52f3089e24fb38
Contents?: true
Size: 1.94 KB
Versions: 5
Compression:
Stored size: 1.94 KB
Contents
module Hydra::Derivatives::Processors # Extract the full text from the content using Solr's extract handler class FullText < Processor # Run the full text extraction and save the result # @return [TrueClass,FalseClass] was the process successful. def process output_file_service.call(extract, directives) end private ## # Extract full text from the content using Solr's extract handler. # This will extract text from the file # # @return [String] The extracted text def extract JSON.parse(fetch)[''].rstrip end # send the request to the extract service and return the response if it was successful. # TODO: this pulls the whole file into memory. We should stream it from Fedora instead # @return [String] the result of calling the extract service def fetch req = Net::HTTP.new(uri.host, uri.port) resp = req.post(uri.to_s, file_content, request_headers) raise "Solr Extract service was unsuccessful. '#{uri}' returned code #{resp.code} for #{source_path}\n#{resp.body}" unless resp.code == '200' file_content.rewind if file_content.respond_to?(:rewind) resp.body end def file_content @content ||= File.open(source_path).read end # @return [Hash] the request headers to send to the Solr extract service def request_headers { Faraday::Request::UrlEncoded::CONTENT_TYPE => mime_type.to_s, Faraday::Adapter::CONTENT_LENGTH => original_size.to_s } end def mime_type Hydra::Derivatives::MimeTypeService.mime_type(source_path) end def original_size File.size(source_path) end # @returns [URI] path to the extract service def uri @uri ||= URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text") end def connection_url ActiveFedora.solr_config[:url] end end end
Version data entries
5 entries across 5 versions & 1 rubygems