class RemoteTable
module Html
def each_row(&block)
backup_file!
convert_file_to_utf8!
html_headers = (headers.is_a?(Array)) ? headers : nil
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
if html_headers.nil?
html_headers = values
next
end
hash = zip html_headers, values
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
end
ensure
restore_file!
end
private
# http://snippets.dzone.com/posts/show/406
def zip(keys, values)
hash = Hash.new
keys.zip(values) { |k,v| hash[k]=v }
hash
end
# should we be doing this in ruby?
def unescaped_html_without_soft_hyphens
str = CGI.unescapeHTML IO.read(path)
str.gsub! //, ''
str
end
end
end