spec/rtesseract_spec.rb in rtesseract-2.0.1 vs spec/rtesseract_spec.rb in rtesseract-2.1.0
- old
+ new
@@ -11,10 +11,11 @@
describe 'Rtesseract' do
before do
@path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path
@image_tif = @path.join('images', 'test.tif').to_s
+ @image_for_pdf = @path.join('images', 'test-pdf.png').to_s
end
it ' be instantiable' do
expect(RTesseract.new.class).to eql(RTesseract)
expect(RTesseract.new('').class).to eql(RTesseract)
@@ -92,16 +93,47 @@
expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits'])
expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits])
expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
end
+ it ' support pdf output mode' do
+ # Internal test. Consider 'pdf' option only when #to_pdf is called.
+ expect(RTesseract.new(@image_tif, options: 'pdf').options_cmd).to eql([])
+ expect(RTesseract.new(@image_for_pdf, options: :pdf).options_cmd).to eql([])
+
+ pdf_ocr = RTesseract.new(@image_for_pdf)
+ expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
+ expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
+ # Comment next line and go to tmp dir to see generated pdf.
+ expect(pdf_ocr.clean).to eq(true)
+ expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
+
+ # Still have original functionality (i.e. #to_s, #to_s_without_spaces).
+ pdf_ocr = RTesseract.new(@image_tif)
+ expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
+ expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
+ expect(pdf_ocr.to_s_without_spaces).to eql('43XF')
+ expect(pdf_ocr.clean).to eq(true)
+ expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
+ end
+
+ it ' warn when tesseract cannot give pdf' do
+ rtesseract = RTesseract.new(@image_for_pdf)
+
+ allow(rtesseract).to receive(:tesseract_version).and_return(3.02)
+ expect { rtesseract.to_pdf }.to raise_error(RTesseract::TesseractVersionError)
+
+ allow(rtesseract).to receive(:tesseract_version).and_return(3.03)
+ expect { rtesseract.to_pdf }.not_to raise_error
+ end
+
it ' be configurable' do
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
- expect(RTesseract.new(@image_tif, tessedit_char_whitelist: "ABCDEF12345").to_s_without_spaces).to eql('43F')
+ expect(RTesseract.new(@image_tif, tessedit_char_whitelist: 'ABCDEF12345').to_s_without_spaces).to eql('43F')
end
it ' crop image' do
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4')
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3')
@@ -177,11 +209,15 @@
RTesseract::Utils.remove_files(Tempfile.new('config'))
expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
end
- it ' support default config processors' do
+ it ' get a numeric value for tesseract version' do
+ expect(RTesseract::Utils.version_number).to be_a Float
+ end
+
+ it ' support default config processors' do
# Rmagick
RTesseract.configure { |config| config.processor = 'rmagick' }
expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true)
# MiniMagick
@@ -208,9 +244,21 @@
RTesseract.configure { |config| config.user_words = '/tmp/test' }
expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ')
RTesseract.configure { |config| config.user_patterns = '/tmp/test' }
expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ')
+ end
+
+ it ' configure pdf has no effect and kept in-house' do
+ # So it does not interfere with #to_s outputting.
+ RTesseract.configure { |config| config.options_cmd = ['pdf'] }
+ expect(RTesseract.new(@image_tif).options_cmd).to eql([])
+
+ RTesseract.configure { |config| config.options_cmd = [:pdf] }
+ expect(RTesseract.new(@image_tif).options_cmd).to eql([])
+
+ RTesseract.configure { |config| config.options_cmd = [:pdf, 'pdf'] }
+ expect(RTesseract.new(@image_tif).options_cmd).to eql([])
end
it ' support new configs' do
expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')