spec/rtesseract_spec.rb in rtesseract-2.0.1 vs spec/rtesseract_spec.rb in rtesseract-2.1.0

- old
+ new

@@ -11,10 +11,11 @@ describe 'Rtesseract' do before do @path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path @image_tif = @path.join('images', 'test.tif').to_s + @image_for_pdf = @path.join('images', 'test-pdf.png').to_s end it ' be instantiable' do expect(RTesseract.new.class).to eql(RTesseract) expect(RTesseract.new('').class).to eql(RTesseract) @@ -92,16 +93,47 @@ expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits']) expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits]) expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet]) end + it ' support pdf output mode' do + # Internal test. Consider 'pdf' option only when #to_pdf is called. + expect(RTesseract.new(@image_tif, options: 'pdf').options_cmd).to eql([]) + expect(RTesseract.new(@image_for_pdf, options: :pdf).options_cmd).to eql([]) + + pdf_ocr = RTesseract.new(@image_for_pdf) + expect(File.exists?(pdf_ocr.to_pdf)).to eql(true) + expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf') + # Comment next line and go to tmp dir to see generated pdf. + expect(pdf_ocr.clean).to eq(true) + expect(File.exists?(pdf_ocr.to_pdf)).to eql(false) + + # Still have original functionality (i.e. #to_s, #to_s_without_spaces). + pdf_ocr = RTesseract.new(@image_tif) + expect(File.exists?(pdf_ocr.to_pdf)).to eql(true) + expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf') + expect(pdf_ocr.to_s_without_spaces).to eql('43XF') + expect(pdf_ocr.clean).to eq(true) + expect(File.exists?(pdf_ocr.to_pdf)).to eql(false) + end + + it ' warn when tesseract cannot give pdf' do + rtesseract = RTesseract.new(@image_for_pdf) + + allow(rtesseract).to receive(:tesseract_version).and_return(3.02) + expect { rtesseract.to_pdf }.to raise_error(RTesseract::TesseractVersionError) + + allow(rtesseract).to receive(:tesseract_version).and_return(3.03) + expect { rtesseract.to_pdf }.not_to raise_error + end + it ' be configurable' do expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0") expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0') expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0") expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF') - expect(RTesseract.new(@image_tif, tessedit_char_whitelist: "ABCDEF12345").to_s_without_spaces).to eql('43F') + expect(RTesseract.new(@image_tif, tessedit_char_whitelist: 'ABCDEF12345').to_s_without_spaces).to eql('43F') end it ' crop image' do expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4') expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3') @@ -177,11 +209,15 @@ RTesseract::Utils.remove_files(Tempfile.new('config')) expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError) end - it ' support default config processors' do + it ' get a numeric value for tesseract version' do + expect(RTesseract::Utils.version_number).to be_a Float + end + + it ' support default config processors' do # Rmagick RTesseract.configure { |config| config.processor = 'rmagick' } expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true) # MiniMagick @@ -208,9 +244,21 @@ RTesseract.configure { |config| config.user_words = '/tmp/test' } expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ') RTesseract.configure { |config| config.user_patterns = '/tmp/test' } expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ') + end + + it ' configure pdf has no effect and kept in-house' do + # So it does not interfere with #to_s outputting. + RTesseract.configure { |config| config.options_cmd = ['pdf'] } + expect(RTesseract.new(@image_tif).options_cmd).to eql([]) + + RTesseract.configure { |config| config.options_cmd = [:pdf] } + expect(RTesseract.new(@image_tif).options_cmd).to eql([]) + + RTesseract.configure { |config| config.options_cmd = [:pdf, 'pdf'] } + expect(RTesseract.new(@image_tif).options_cmd).to eql([]) end it ' support new configs' do expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ') expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')