ocr_parser.rb in sqed-0.5.6

- old
+ new

@@ -108,49 +108,49 @@
 
   # @return [String]
   #   the ocr text
   # TODO: very kludge
   def get_text(section_type: :default)
-    img = image.dup
+    img = image
 
     # resample if an image 4"x4" is less than 300dpi 
     if img.columns * img.rows < 144000
       img = img.resample(300)
     end
 
     params = SECTION_PARAMS[:default].dup
     params.merge!(SECTION_PARAMS[section_type])
 
     # May be able to overcome this hacky kludge messe with providing `processor:` to new
-    file = Tempfile.new('foo1', encoding: 'ascii-8bit')
+    file = Tempfile.new('foo1', encoding: 'utf-8')
     begin
-      file.write(image.to_blob)
+      file.write(image.to_blob.force_encoding('utf-8'))
       file.rewind
       @extracted_text = RTesseract.new(file.path, params).to_s&.strip
       file.close
     ensure
       file.close
       file.unlink   # deletes the temp file
     end
 
     if @extracted_text == ''
-      file = Tempfile.new('foo2')
+      file = Tempfile.new('foo2', encoding: 'utf-8')
       begin
-        file.write(img.dup.white_threshold(245).to_blob)
+        file.write(img.dup.white_threshold(245).to_blob.force_encoding('utf-8'))
         file.rewind
         @extracted_text = RTesseract.new(file.path, params).to_s&.strip
         file.close
       ensure
         file.close
         file.unlink
       end
     end
 
     if @extracted_text == ''
-      file = Tempfile.new('foo3')
+      file = Tempfile.new('foo3', encoding: 'utf-8')
       begin
-        file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob)
+        file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob.force_encoding('utf-8'))
         file.rewind
         @extracted_text = RTesseract.new(file.path, params).to_s&.strip
         file.close
       ensure
         file.close
@@ -160,5 +160,6 @@
 
     @extracted_text
   end
 
 end
+