lib/remote_table/format/html.rb in remote_table-1.1.7 vs lib/remote_table/format/html.rb in remote_table-1.1.8
- old
+ new
@@ -6,11 +6,11 @@
include Textual
def each(&blk)
remove_useless_characters!
html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
- values = row.xpath(t.properties.column_xpath).map { |td| utf8 td.content.gsub(/\s+/, ' ').strip }
+ values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
if html_headers.nil?
html_headers = values
next
end
hash = zip html_headers, values
@@ -29,10 +29,10 @@
hash
end
# should we be doing this in ruby?
def unescaped_html_without_soft_hyphens
- str = ::CGI.unescapeHTML ::IO.read(t.local_file.path)
+ str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
# get rid of MS Office baddies
str.gsub! /­/, ''
str
end
end