Sha256: a754676eb2450c69296d5a8b61212a4ae783447f728da9ace59e5e080e2f8089

Contents?: true

Size: 1.5 KB

Versions: 2

Compression:

Stored size: 1.5 KB

Contents

require 'iiif_print/text_formats_from_alto_service'

module IiifPrint
  class TextExtractionDerivativeService < BaseDerivativeService
    # @param [Hash<Symbol,Symbol>]
    #
    # The key for the hash represents the file extension.  The key's value represents the instance
    # method to call on {IiifPrint::TextExtraction::PageOCR}
    class_attribute :ocr_derivatives, default: { txt: :plain, xml: :alto, json: :word_json }
    class_attribute :alto_derivative_service_class, default: IiifPrint::TextFormatsFromALTOService
    class_attribute :page_ocr_service_class, default: IiifPrint::TextExtraction::PageOCR
    def initialize(file_set)
      super(file_set)
    end

    def create_derivatives(src)
      from_alto = alto_derivative_service_class.new(
        file_set
      )
      return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
      create_derivatives_from_ocr(src)
    end

    def create_derivatives_from_ocr(filename)
      # TODO: Do we need this source_path instance variable?
      @source_path = filename
      ocr = page_ocr_service_class.new(filename)

      ocr_derivatives.each do |extension, method_name|
        path = prepare_path(extension.to_s)
        write(content: ocr.public_send(method_name), path: path)
      end
    end

    def write(content:, path:)
      File.open(path, 'w') do |outfile|
        outfile.write(content)
      end
    end

    def cleanup_derivatives(*)
      ocr_derivatives.keys do |extension|
        super(extension.to_s)
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
iiif_print-1.1.0 lib/iiif_print/text_extraction_derivative_service.rb
iiif_print-1.0.0 lib/iiif_print/text_extraction_derivative_service.rb