Sha256: 50a2a7963e7039733707a4c9c5189f648733e8d2dd48665053b9d12b5f3fd8a6
Contents?: true
Size: 708 Bytes
Versions: 2
Compression:
Stored size: 708 Bytes
Contents
require 'nokogiri' class RTesseract module Box extend RTesseract::Base def self.run(source, options) options.tessedit_create_hocr = 1 RTesseract::Command.new(source, temp_file, options).run parse(File.read(temp_file('.hocr'))) end def self.parse(content) html = Nokogiri::HTML(content) html.css('span.ocrx_word, span.ocr_word').map do |word| @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ') { word: word.text, x_start: @attributes[1].to_i, y_start: @attributes[2].to_i, x_end: @attributes[3].to_i, y_end: @attributes[4].to_i } end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
rtesseract-3.0.2 | lib/rtesseract/box.rb |
rtesseract-3.0.1 | lib/rtesseract/box.rb |