Sha256: 1ab0a0e0a37cf48b6b0fd6aa8e2668543e5adcbfe05b5edaafaf02f8046faa1d

Contents?: true

Size: 1.89 KB

Versions: 1

Compression:

Stored size: 1.89 KB

Contents

require 'nokogiri'
require 'cgi'
class RemoteTable
  class Format
    module ProcessedByNokogiri
      def each
        raise "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" unless t.properties.row_css or t.properties.row_xpath
        remove_useless_characters!
        transliterate_whole_file_to_utf8!
        
        headers = t.properties.headers

        xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
        (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
          values = if column_css?
            row.css(t.properties.column_css)
          elsif column_xpath?
            row.xpath(t.properties.column_xpath)
          else
            [row]
          end.map { |cell| assume_utf8 cell.content.gsub(/\s+/, ' ').strip }
          if headers == :first_row
            headers = values.select(&:present?)
            next
          end
          output = if t.properties.output_class == ::Array
            values
          else
            zip headers, values
          end
          if t.properties.keep_blank_rows or values.any?
            yield output
          end
        end
      ensure
        t.local_file.cleanup
      end

      private

      def row_css?
        !!t.properties.row_css
      end
      
      def column_css?
        !!t.properties.column_css
      end
      
      def column_xpath?
        !!t.properties.column_xpath
      end
      
      # http://snippets.dzone.com/posts/show/406
      def zip(keys, values)
        hash = ::ActiveSupport::OrderedHash.new
        keys.zip(values) { |k,v| hash[k]=v }
        hash
      end

      # should we be doing this in ruby?
      def unescaped_xml_without_soft_hyphens
        str = ::CGI.unescapeHTML t.local_file.encoded_io.read
        # get rid of MS Office baddies
        str.gsub! '­', ''
        str
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
remote_table-1.3.0 lib/remote_table/format/mixins/processed_by_nokogiri.rb