Sha256: 7d8d49cd852f00a3b20e90a7bfa8a00b9348d7906468cae5c3cd1e1684fd6626
Contents?: true
Size: 995 Bytes
Versions: 1
Compression:
Stored size: 995 Bytes
Contents
# frozen_string_literal: true class RTesseract module Box extend RTesseract::Base class << self def run(source, errors, options) options.tessedit_create_hocr = 1 RTesseract::Command.new(source, temp_file, errors, options).run parse(File.read(temp_file('.hocr'))) end def parse(content) content.lines.map { |line| parse_line(line) }.compact end def parse_line(line) return unless line.match?(/oc(rx|r)_word/) word = line.match(/(?<=>)(.*?)(?=<)/).to_s return if word.strip == '' word_info(word, parse_position(line)) end def word_info(word, positions) { word: word, x_start: positions[1].to_i, y_start: positions[2].to_i, x_end: positions[3].to_i, y_end: positions[4].to_i } end def parse_position(line) line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ') end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rtesseract-3.1.0 | lib/rtesseract/box.rb |