lib/remote_table/format/html.rb in remote_table-1.1.10 vs lib/remote_table/format/html.rb in remote_table-1.2.0
- old
+ new
@@ -2,40 +2,13 @@
require 'cgi'
class RemoteTable
class Format
class HTML < Format
include Textual
- def each(&blk)
- remove_useless_characters!
- html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
- ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
- values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
- if html_headers.nil?
- html_headers = values
- next
- end
- hash = zip html_headers, values
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
- end
- ensure
- t.local_file.delete
- end
-
- private
-
- # http://snippets.dzone.com/posts/show/406
- def zip(keys, values)
- hash = ::Hash.new
- keys.zip(values) { |k,v| hash[k]=v }
- hash
- end
-
- # should we be doing this in ruby?
- def unescaped_html_without_soft_hyphens
- str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
- # get rid of MS Office baddies
- str.gsub! '­', ''
- str
+ include ProcessedByNokogiri
+
+ def nokogiri_class
+ ::Nokogiri::HTML::Document
end
end
end
end