Sha256: ce7b72958c2c63fcc67fba7d6891d075123e5f9c98ef0254ed7e290c82db9f8d
Contents?: true
Size: 1.19 KB
Versions: 2
Compression:
Stored size: 1.19 KB
Contents
require 'nokogiri' require 'cgi' class RemoteTable class Format class HTML < Format include Textual def each(&blk) remove_useless_characters! html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row| values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip } if html_headers.nil? html_headers = values next end hash = zip html_headers, values yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? } end ensure t.local_file.delete end private # http://snippets.dzone.com/posts/show/406 def zip(keys, values) hash = ::Hash.new keys.zip(values) { |k,v| hash[k]=v } hash end # should we be doing this in ruby? def unescaped_html_without_soft_hyphens str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path)) # get rid of MS Office baddies str.gsub! '­', '' str end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
remote_table-1.1.10 | lib/remote_table/format/html.rb |
remote_table-1.1.9 | lib/remote_table/format/html.rb |