lib/sqed/parser/ocr_parser.rb in sqed-0.4.4 vs lib/sqed/parser/ocr_parser.rb in sqed-0.5.0

- old
+ new

@@ -1,7 +1,11 @@ require 'rtesseract' +# We use tempfile because Rtesseract doesn't work directly with ImageMagic::Image (any longer... apparently, maybe) +# https://ruby-doc.org/stdlib-2.6.1/libdoc/tempfile/rdoc/Tempfile.html +require 'tempfile' + # encoding: UTF-8 # # Given a single image return all text in that image. # # For reference @@ -113,22 +117,45 @@ end params = SECTION_PARAMS[:default].dup params.merge!(SECTION_PARAMS[section_type]) - r = RTesseract.new(img, params) - @extracted_text = r.to_s.strip + # May be able to overcome this hacky kludge messe with providing `processor:` to new + file = Tempfile.new('foo1') + begin + file.write(image.to_blob) + file.rewind + @extracted_text = RTesseract.new(file.path, params).to_s&.strip + file.close + ensure + file.close + file.unlink # deletes the temp file + end if @extracted_text == '' - img = img.white_threshold(245) - r = RTesseract.new(img, params) - @extracted_text = r.to_s.strip + file = Tempfile.new('foo2') + begin + file.write(img.dup.white_threshold(245).to_blob) + file.rewind + @extracted_text = RTesseract.new(file.path, params).to_s&.strip + file.close + ensure + file.close + file.unlink # deletes the temp file + end end if @extracted_text == '' - img = img.quantize(256,Magick::GRAYColorspace) - r = RTesseract.new(img, params) - @extracted_text = r.to_s.strip + file = Tempfile.new('foo3') + begin + file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob) + file.rewind + @extracted_text = RTesseract.new(file.path, params).to_s&.strip + file.close + ensure + file.close + file.unlink # deletes the temp file + end end @extracted_text end