lib/remote_table/file/html.rb in remote_table-0.2.22 vs lib/remote_table/file/html.rb in remote_table-0.2.23
- old
+ new
@@ -1,10 +1,11 @@
class RemoteTable
module Html
def each_row(&block)
backup_file!
convert_file_to_utf8!
+ remove_useless_characters!
html_headers = (headers.is_a?(Array)) ? headers : nil
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
if html_headers.nil?
html_headers = values
@@ -27,10 +28,10 @@
end
# should we be doing this in ruby?
def unescaped_html_without_soft_hyphens
str = CGI.unescapeHTML IO.read(path)
- str.gsub! /­|\302\255/, ''
+ str.gsub! /­/, ''
str
end
end
end