lib/sqed/parser/ocr_parser.rb in sqed-0.5.4 vs lib/sqed/parser/ocr_parser.rb in sqed-0.5.5
- old
+ new
@@ -106,23 +106,24 @@
# img
# end
# @return [String]
# the ocr text
+ # TODO: very kludge
def get_text(section_type: :default)
- img = image
+ img = image.dup
# resample if an image 4"x4" is less than 300dpi
if img.columns * img.rows < 144000
img = img.resample(300)
end
params = SECTION_PARAMS[:default].dup
params.merge!(SECTION_PARAMS[section_type])
# May be able to overcome this hacky kludge messe with providing `processor:` to new
- file = Tempfile.new('foo1')
+ file = Tempfile.new('foo1', encoding: 'ascii-8bit')
begin
file.write(image.to_blob)
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
@@ -138,23 +139,23 @@
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
ensure
file.close
- file.unlink # deletes the temp file
+ file.unlink
end
end
if @extracted_text == ''
file = Tempfile.new('foo3')
begin
- file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob)
+ file.write(img.dup.quantize(256, Magick::GRAYColorspace).to_blob)
file.rewind
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
file.close
ensure
file.close
- file.unlink # deletes the temp file
+ file.unlink
end
end
@extracted_text
end