Sha256: e7d5c468c7f72cbd8991a4052564e2fdfca18d5ad6351ed75309c133b0e3c1d8
Contents?: true
Size: 1.72 KB
Versions: 2
Compression:
Stored size: 1.72 KB
Contents
require 'iiif_print/text_formats_from_alto_service' module IiifPrint class TextExtractionDerivativeService < BaseDerivativeService # @param [Hash<Symbol,Symbol>] # # The key for the hash represents the file extension. The key's value represents the instance # method to call on {IiifPrint::TextExtraction::PageOCR} class_attribute :ocr_derivatives, default: { txt: :plain, xml: :alto, json: :word_json } class_attribute :alto_derivative_service_class, default: IiifPrint::TextFormatsFromALTOService class_attribute :page_ocr_service_class, default: IiifPrint::TextExtraction::PageOCR def initialize(file_set) super(file_set) end def create_derivatives(src) from_alto = alto_derivative_service_class.new( file_set ) return from_alto.create_derivatives(src) unless from_alto.alto_path.nil? create_derivatives_from_ocr(src) end def create_derivatives_from_ocr(filename) # TODO: Do we need this source_path instance variable? @source_path = filename ocr = page_ocr_service_class.new(filename) ocr_derivatives.each do |extension, method_name| path = prepare_path(extension.to_s) write(content: ocr.public_send(method_name), path: path, extension: extension) end end def write(content:, path:, extension:) mime_type = mime_type_for(extension) File.open(path, 'w') do |outfile| outfile.write(content) IiifPrint.copy_derivatives_from_data_store(stream: content, directives: { url: path, container: 'extracted_text', mime_type: mime_type }) end end def cleanup_derivatives(*) ocr_derivatives.keys do |extension| super(extension.to_s) end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
iiif_print-2.0.1 | lib/iiif_print/text_extraction_derivative_service.rb |
iiif_print-2.0.0 | lib/iiif_print/text_extraction_derivative_service.rb |