require File.expand_path(File.dirname(__FILE__) + '/spec_helper') # encoding: UTF-8 require 'pathname' RTesseract::Processor::RMagickProcessor.setup # Class to rise error class MakeStringError def to_s fail 'error' end end describe 'Rtesseract' do before do @path ='rtesseract_spec.rb', '')).expand_path @image_tif = @path.join('images', 'test.tif').to_s @image_for_pdf = @path.join('images', 'test-pdf.png').to_s end it ' be instantiable' do expect( eql(RTesseract) expect('').class).to eql(RTesseract) expect( eql(RTesseract) end it ' translate image to text' do expect( eql('43XF') expect(, processor: 'mini_magick').to_s_without_spaces).to eql('43XF') expect('images', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4') expect('images', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4') end it ' translate images .png, .jpg, .bmp' do expect('images', 'test.png').to_s).to_s_without_spaces).to eql('HW9W') expect('images', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F') expect('images', 'test.bmp').to_s).to_s_without_spaces).to eql('FLA6') end it ' should not error with depth > 32' do # expect('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('') end it ' support different processors' do # Rmagick expect( eql('43XF') expect(, processor: 'rmagick').to_s_without_spaces).to eql('43XF') expect('images', 'test.png').to_s, processor: 'rmagick').to_s_without_spaces).to eql('HW9W') # MiniMagick expect(, processor: 'mini_magick').to_s_without_spaces).to eql('43XF') expect('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W') # NoneMagick expect(, processor: 'none').to_s_without_spaces).to eql('43XF') end it ' change the image' do image = expect(image.to_s_without_spaces).to eql('43XF') image.source = @path.join('images', 'test1.tif').to_s expect(image.to_s_without_spaces).to eql('V2V4') end it ' returns the source' do image = expect(image.source).to eql( end it ' select the language' do # English expect(, lang: 'eng').lang).to eql(' -l eng ') expect(, lang: 'en').lang).to eql(' -l eng ') expect(, lang: 'en-US').lang).to eql(' -l eng ') expect(, lang: 'english').lang).to eql(' -l eng ') # Portuguese expect(, lang: 'por').lang).to eql(' -l por ') expect(, lang: 'pt-BR').lang).to eql(' -l por ') expect(, lang: 'pt-br').lang).to eql(' -l por ') expect(, lang: 'pt').lang).to eql(' -l por ') expect(, lang: 'portuguese').lang).to eql(' -l por ') expect(, lang: 'eng').to_s_without_spaces).to eql('43XF') expect(, lang: 'eng').lang).to eql(' -l eng ') expect(, lang: 'it').lang).to eql(' -l ita ') # Invalid lang object expect(, lang: eql('') end it ' select options' do expect( eql([]) expect(, options: 'digits').options_cmd).to eql(['digits']) expect(, options: :digits).options_cmd).to eql([:digits]) expect(, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet]) end it ' support pdf output mode' do # Internal test. Consider 'pdf' option only when #to_pdf is called. expect(, options: 'pdf').options_cmd).to eql([]) expect(, options: :pdf).options_cmd).to eql([]) pdf_ocr = expect(File.exists?(pdf_ocr.to_pdf)).to eql(true) expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf') # Comment next line and go to tmp dir to see generated pdf. expect(pdf_ocr.clean).to eq(true) expect(File.exists?(pdf_ocr.to_pdf)).to eql(false) # Still have original functionality (i.e. #to_s, #to_s_without_spaces). pdf_ocr = expect(File.exists?(pdf_ocr.to_pdf)).to eql(true) expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf') expect(pdf_ocr.to_s_without_spaces).to eql('43XF') expect(pdf_ocr.clean).to eq(true) expect(File.exists?(pdf_ocr.to_pdf)).to eql(false) end it ' warn when tesseract cannot give pdf' do rtesseract = allow(rtesseract).to receive(:tesseract_version).and_return(3.02) expect { rtesseract.to_pdf }.to raise_error(RTesseract::TesseractVersionError) allow(rtesseract).to receive(:tesseract_version).and_return(3.03) expect { rtesseract.to_pdf }.not_to raise_error end it ' be configurable' do expect(, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0") expect(, chop_enable: 0).config).to eql('chop_enable 0') expect(, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0") expect(, chop_enable: 0).to_s_without_spaces).to eql('43XF') expect(, tessedit_char_whitelist: 'ABCDEF12345').to_s_without_spaces).to eql('43F') end it ' crop image' do expect(, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4') expect(, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3') expect(, psm: 7).crop!(w: 20, h: 40, x: 216, y: 10).to_s_without_spaces).to eql('X') expect(, psm: 7).crop!(w: 30, h: 40, x: 240, y: 10).to_s_without_spaces).to eql('F') end it ' read image from blob' do image ='images', 'test.png').to_s).first blob = image.quantize(256, Magick::GRAYColorspace).to_blob test ='', psm: 7) test.from_blob(blob) expect(test.to_s_without_spaces).to eql('HW9W') test ='', psm: 7) expect { test.from_blob('') }.to raise_error(RTesseract::ConversionError) end it ' use a instance' do expect( eql('43XF') expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false) expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true) expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true) expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false) expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true) expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true) expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true) expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true) end it ' change image in a block' do test ='images', 'test.png').to_s) {} expect(test.class).to eql(RTesseract) test = do |_image| _image = _image.quantize(256, Magick::GRAYColorspace) end expect(test.to_s_without_spaces).to eql('43XF') test ='images', 'blank.tif').to_s) do |_image| _image end expect(test.to_s_without_spaces).to eql('') test ='images', 'test.png').to_s) do |_image| _image.rotate(90) end expect(test.to_s_without_spaces).to eql('HW9W') test ='images', 'test.jpg').to_s, lang: 'en') do |_image| _image = _image.white_threshold(245).quantize(256, Magick::GRAYColorspace) end expect(test.to_s_without_spaces).to eql('3R8F') test ='images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |_image| _image.gravity 'south' end expect(test.to_s_without_spaces).to eql('3R8F') end it 'does not raise on read with image_magick processor' do expect { instance =, processor: 'mini_magick') {} expect(instance.processor.a_name?('mini_magick')).to be_truthy }.not_to raise_error end it ' get a error' do expect {'images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError) expect {'images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError) # Invalid psm object expect(, psm: eql('') end it 'remove a file' do RTesseract::Utils.remove_files('config')) expect { RTesseract::Utils.remove_files('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError) end it ' get a numeric value for tesseract version' do expect(RTesseract::Utils.version_number).to be_a Float end it ' support default config processors' do # Rmagick RTesseract.configure { |config| config.processor = 'rmagick' } expect('rmagick')).to eql(true) # MiniMagick RTesseract.configure { |config| config.processor = 'mini_magick' } expect('mini_magick')).to eql(true) # NoneMagick RTesseract.configure { |config| config.processor = 'none' } expect('none')).to eql(true) # overwrite default RTesseract.configure { |config| config.processor = 'rmagick' } expect(, processor: 'mini_magick').processor.a_name?('mini_magick')).to eql(true) RTesseract.configure { |config| config.lang = 'portuguese' } expect( eql(' -l por ') RTesseract.configure { |config| config.psm = 7 } expect( eql(' -psm 7 ') RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' } expect( eql(' --tessdata-dir /tmp/test ') RTesseract.configure { |config| config.user_words = '/tmp/test' } expect( eql(' --user-words /tmp/test ') RTesseract.configure { |config| config.user_patterns = '/tmp/test' } expect( eql(' --user-patterns /tmp/test ') end it ' configure pdf has no effect and kept in-house' do # So it does not interfere with #to_s outputting. RTesseract.configure { |config| config.options_cmd = ['pdf'] } expect( eql([]) RTesseract.configure { |config| config.options_cmd = [:pdf] } expect( eql([]) RTesseract.configure { |config| config.options_cmd = [:pdf, 'pdf'] } expect( eql([]) end it ' support new configs' do expect(, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ') expect(, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ') expect(, user_patterns: '/tmp/test').user_patterns).to eql(' --user-patterns /tmp/test ') expect(, tessdata_dir: eql('') expect(, user_words: eql('') expect(, user_patterns: eql('') # expect('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n") end end