Sha256: 83701bc6877ed932a10aa46ff185902413cf1a0b8b47b0ac8b16cb2d0acaaf31

Contents?: true

Size: 1000 Bytes

Versions: 10

Compression:

Stored size: 1000 Bytes

Contents

class RemoteTable
  module Html
    def each_row(&block)
      backup_file!
      convert_file_to_utf8!
      remove_useless_characters!
      html_headers = (headers.is_a?(Array)) ? headers : nil
      Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
        values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
        if html_headers.nil?
          html_headers = values
          next
        end
        hash = zip html_headers, values
        yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
      end
    ensure
      restore_file!
    end

    private
    
    # http://snippets.dzone.com/posts/show/406
    def zip(keys, values)
      hash = Hash.new
      keys.zip(values) { |k,v| hash[k]=v }
      hash
    end
    
    # should we be doing this in ruby?
    def unescaped_html_without_soft_hyphens
      str = CGI.unescapeHTML IO.read(path)
      str.gsub! /­/, ''
      str
    end
  end
end

Version data entries

10 entries across 10 versions & 2 rubygems

Version Path
remote_table-0.2.32 lib/remote_table/file/html.rb
remote_table-0.2.31 lib/remote_table/file/html.rb
remote_table-ruby19-0.2.30 lib/remote_table/file/html.rb
remote_table-0.2.30 lib/remote_table/file/html.rb
remote_table-0.2.29 lib/remote_table/file/html.rb
remote_table-0.2.28 lib/remote_table/file/html.rb
remote_table-0.2.27 lib/remote_table/file/html.rb
remote_table-0.2.26 lib/remote_table/file/html.rb
remote_table-0.2.24 lib/remote_table/file/html.rb
remote_table-0.2.23 lib/remote_table/file/html.rb