lib/sqed/parser/ocr_parser.rb in sqed-0.5.4 vs lib/sqed/parser/ocr_parser.rb in sqed-0.5.5

- old
+ new

@@ -106,23 +106,24 @@ # img # end # @return [String] # the ocr text + # TODO: very kludge def get_text(section_type: :default) - img = image + img = image.dup # resample if an image 4"x4" is less than 300dpi if img.columns * img.rows < 144000 img = img.resample(300) end params = SECTION_PARAMS[:default].dup params.merge!(SECTION_PARAMS[section_type]) # May be able to overcome this hacky kludge messe with providing `processor:` to new - file = Tempfile.new('foo1') + file = Tempfile.new('foo1', encoding: 'ascii-8bit') begin file.write(image.to_blob) file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close @@ -138,23 +139,23 @@ file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close ensure file.close - file.unlink # deletes the temp file + file.unlink end end if @extracted_text == '' file = Tempfile.new('foo3') begin - file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob) + file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob) file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close ensure file.close - file.unlink # deletes the temp file + file.unlink end end @extracted_text end