Sha256: 46e3ddbad7ef7722a6d6849cfa1c8f4f66428820fa9b63201db21465313b3f5d

Contents?: true

Size: 967 Bytes

Versions: 10

Compression:

Stored size: 967 Bytes

Contents

class RemoteTable
  module Html
    def each_row(&block)
      backup_file!
      convert_file_to_utf8!
      html_headers = (headers.is_a?(Array)) ? headers : nil
      Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
        values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
        if html_headers.nil?
          html_headers = values
          next
        end
        hash = zip html_headers, values
        yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
      end
    ensure
      restore_file!
    end

    private
    
    # http://snippets.dzone.com/posts/show/406
    def zip(keys, values)
      hash = Hash.new
      keys.zip(values) { |k,v| hash[k]=v }
      hash
    end
    
    # should we be doing this in ruby?
    def unescaped_html_without_soft_hyphens
      str = CGI.unescapeHTML IO.read(path)
      str.gsub! /­/, ''
      str
    end
  end
end

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
remote_table-0.2.21 lib/remote_table/file/html.rb
remote_table-0.2.20 lib/remote_table/file/html.rb
remote_table-0.2.19 lib/remote_table/file/html.rb
remote_table-0.2.18 lib/remote_table/file/html.rb
remote_table-0.2.17 lib/remote_table/file/html.rb
remote_table-0.2.16 lib/remote_table/file/html.rb
remote_table-0.2.15 lib/remote_table/file/html.rb
remote_table-0.2.14 lib/remote_table/file/html.rb
remote_table-0.2.13 lib/remote_table/file/html.rb
remote_table-0.2.12 lib/remote_table/file/html.rb