Sha256: 17912dbdd04538a5fc746838acabc4502c9bd55281ac4adc13157b6f1d222a10

Contents?: true

Size: 1.78 KB

Versions: 4

Compression:

Stored size: 1.78 KB

Contents

# encoding: UTF-8
#
# Given a single image return all text in that image.
#
# For past reference http://misteroleg.wordpress.com/2012/12/19/ocr-using-tesseract-and-imagemagick-as-pre-processing-task/
#
require 'rtesseract' 

class Sqed::Parser::OcrParser < Sqed::Parser
  attr_accessor  :text

  def text
    img = @image #.white_threshold(245)

    # @jrflood: this is where you will have to do some research, tuning images so that they can be better ocr-ed,
    # all of these methods are from RMagick.
    # get potential border pixel color (based on quadrant?)
    new_color = img.pixel_color(1, 1)
    # img = img.scale(2)
    # img.write('foo0.jpg.jpg')
    # img = img.enhance
    # img = img.enhance
    # img = img.enhance
    # img = img.enhance
    # img.write('foo1.jpg')
    # img = img.quantize(8, Magick::GRAYColorspace)
    # img.write('foo1.jpg')
    # img = img.sharpen(1.0, 0.2)
    # img.write('foo2.jpg')
    # border_color = img.pixel_color(img.columns - 1, img.rows - 1)
    # img = img.color_floodfill(img.columns - 1, img.rows - 1, new_color)
    # img.write('tmp/foo4.jpg')
    # img = img.quantize(2, Magick::GRAYColorspace)
    # #img = img.threshold(0.5)
    # img.write('foo4.jpg') # for debugging purposes, this is the image that is sent to OCR
    # img = img.equalize #(32, Magick::GRAYColorspace)
    # img.write('foo5.jpg') # for debugging purposes, this is the image that is sent to OCR
    # #img.write('foo3.jpg') # for debugging purposes, this is the image that is sent to OCR
    #
    # img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR

    r = RTesseract.new(img, lang: 'eng', psm: 3)


    # img = img.white_threshold(245)

    @text = r.to_s 
  end

  # Need to provide tuning methods here, i.e. image transormations that facilitate OCR

end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
sqed-0.0.4 lib/sqed/parser/ocr_parser.rb
sqed-0.0.3 lib/sqed/parser/ocr_parser.rb
sqed-0.0.2 lib/sqed/parser/ocr_parser.rb
sqed-0.0.1 lib/sqed/parser/ocr_parser.rb