lib/sqed/parser/ocr_parser.rb in sqed-0.5.5 vs lib/sqed/parser/ocr_parser.rb in sqed-0.5.6
- old
+ new
@@ -108,49 +108,49 @@
# @return [String]
# the ocr text
# TODO: very kludge
def get_text(section_type: :default)
- img = image.dup
+ img = image
# resample if an image 4"x4" is less than 300dpi
if img.columns * img.rows < 144000
img = img.resample(300)
end
params = SECTION_PARAMS[:default].dup
params.merge!(SECTION_PARAMS[section_type])
# May be able to overcome this hacky kludge messe with providing `processor:` to new
- file = Tempfile.new('foo1', encoding: 'ascii-8bit')
+ file = Tempfile.new('foo1', encoding: 'utf-8')
begin
- file.write(image.to_blob)
+ file.write(image.to_blob.force_encoding('utf-8'))
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
ensure
file.close
file.unlink # deletes the temp file
end
if @extracted_text == ''
- file = Tempfile.new('foo2')
+ file = Tempfile.new('foo2', encoding: 'utf-8')
begin
- file.write(img.dup.white_threshold(245).to_blob)
+ file.write(img.dup.white_threshold(245).to_blob.force_encoding('utf-8'))
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
ensure
file.close
file.unlink
end
end
if @extracted_text == ''
- file = Tempfile.new('foo3')
+ file = Tempfile.new('foo3', encoding: 'utf-8')
begin
- file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob)
+ file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob.force_encoding('utf-8'))
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
ensure
file.close
@@ -160,5 +160,6 @@
@extracted_text
end
end
+