RubygemsResearch

Sha256: a4e172e7aa7c6efab1e3149867e378d77451d26b04f557fba0f460b62e80ebe5

Contents?: true

Size: 1.81 KB

Versions: 8

Compression:

Stored size: 1.81 KB

module OcrFile
  module OcrEngines
    module CloudVision
      extend self

      DEFAULT_LANGUAGE = 'en'

      # Available Types: https://github.com/googleapis/google-cloud-ruby/blob/master/google-cloud-vision/lib/google/cloud/vision/v1/image_annotator_pb.rb
      TEXT_DETECTION = 'TEXT_DETECTION' # Used for low-quality images
      DOCUMENT_TEXT_DETECTION = 'DOCUMENT_TEXT_DETECTION' # Used for dense text documents

      def id
        'cloud-vision'
      end

      def ocr_to_text(file_path, options: { type_of_ocr: '', image_annotator: nil })
        type_of_ocr = options[:type_of_ocr]
        image_annotator = options[:image_annotator]

        response = detect_text(type_of_ocr, file_path, image_annotator)
        extract_text(response)
      end

      def ocr_to_pdf(file_path, options: { type_of_ocr: '', image_annotator: nil })
        text = ocr_to_text(file_path, options: { type_of_ocr: '', image_annotator: nil })
        OcrFile::ImageEngines::PdfEngine.pdf_from_text(text, options)
      end

      private

      def detect_text(type_of_ocr, image_path, image_annotator)
        if type_of_ocr == 'DOCUMENT_TEXT_DETECTION'
          image_annotator.document_text_detection(image: image_path)
        else
          image_annotator.text_detection(image: image_path)
        end
      end

      def extract_text(response)
        raw_text = ''
        foreign_text = ''

        response.responses.each do |section|
          section.text_annotations.each do |annotation|
            raw_text << annotation.description

            if annotation.locale && annotation.locale != DEFAULT_LANGUAGE
              foreign_text << annotation.description
            end
          end
        end

        raw_text = raw_text.split("\n")
        raw_text.pop # Remove the last line
        raw_text.join("\n")
      end
    end
  end
end

Version data entries

8 entries across 8 versions & 1 rubygems

Version	Path
ocr-file-0.0.10	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.8	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.7	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.6	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.4	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.3	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.2	lib/ocr-file/ocr_engines/cloud_vision.rb
ocr-file-0.0.1	lib/ocr-file/ocr_engines/cloud_vision.rb