lib/sqed/parser/ocr_parser.rb in sqed-0.5.5 vs lib/sqed/parser/ocr_parser.rb in sqed-0.5.6

- old
+ new

@@ -108,49 +108,49 @@ # @return [String] # the ocr text # TODO: very kludge def get_text(section_type: :default) - img = image.dup + img = image # resample if an image 4"x4" is less than 300dpi if img.columns * img.rows < 144000 img = img.resample(300) end params = SECTION_PARAMS[:default].dup params.merge!(SECTION_PARAMS[section_type]) # May be able to overcome this hacky kludge messe with providing `processor:` to new - file = Tempfile.new('foo1', encoding: 'ascii-8bit') + file = Tempfile.new('foo1', encoding: 'utf-8') begin - file.write(image.to_blob) + file.write(image.to_blob.force_encoding('utf-8')) file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close ensure file.close file.unlink # deletes the temp file end if @extracted_text == '' - file = Tempfile.new('foo2') + file = Tempfile.new('foo2', encoding: 'utf-8') begin - file.write(img.dup.white_threshold(245).to_blob) + file.write(img.dup.white_threshold(245).to_blob.force_encoding('utf-8')) file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close ensure file.close file.unlink end end if @extracted_text == '' - file = Tempfile.new('foo3') + file = Tempfile.new('foo3', encoding: 'utf-8') begin - file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob) + file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob.force_encoding('utf-8')) file.rewind @extracted_text = RTesseract.new(file.path, params).to_s&.strip file.close ensure file.close @@ -160,5 +160,6 @@ @extracted_text end end +