Sha256: ad5baaf673c0208ccd6b9c33467e58bc22afb5f656fc76792235d69926284781
Contents?: true
Size: 1.33 KB
Versions: 1
Compression:
Stored size: 1.33 KB
Contents
# encoding: UTF-8 # RTesseract class RTesseract # Alternative approach to Mixed when you want to read from specific areas. # Requires `-psm 4` which means the text must be "a single column of text of variable sizes". class Uzn < RTesseract attr_reader :areas DEFAULT_ALPHABET = 'Text/Latin' def initialize(src = '', options = {}) @areas = options.delete(:areas) || [] @alphabet = options.delete(:alphabet) || DEFAULT_ALPHABET super(src, options.merge(psm: 4)) yield self if block_given? end # Add areas def area(points) areas << points end def convert_command @image = image write_uzn_file `#{configuration.command} "#{@image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}` end def after_convert_hook RTesseract::Utils.remove_files([@uzn_file]) end private def write_uzn_file folder = File.dirname(@image) basename = File.basename(@image, '.tif') @uzn_file = File.new("#{folder}/#{basename}.uzn", File::CREAT|File::TRUNC|File::RDWR) areas.each do |points| s = "#{points[:x]} #{points[:y]} #{points[:w]} #{points[:h]} #{@alphabet}\n" @uzn_file.write(s) @uzn_file.flush end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rtesseract-2.2.0 | lib/rtesseract/uzn.rb |