Sha256: 684e572aae2f42e7c91e2c911c090734a52315446f2a805f82ca0922aa9a5049

Contents?: true

Size: 1.09 KB

Versions: 6

Compression:

Stored size: 1.09 KB

Contents

require 'delegate'
require 'csv'
require 'digest/md5'

module Peddler
  # @api private
  class FlatFileParser < SimpleDelegator
    # http://stackoverflow.com/questions/8073920/importing-csv-quoting-error-is-driving-me-nuts
    OPTIONS = { col_sep: "\t", quote_char: "\x00", headers: true }

    attr_reader :content, :summary, :encoding

    def initialize(res, encoding)
      super(res)
      @encoding = encoding
      extract_content
    end

    def parse
      CSV.parse(scrub_content, OPTIONS) if content
    end

    def records_count
      summarize if has_summary?
    end

    def valid?
      headers['Content-MD5'] == Digest::MD5.base64digest(body)
    end

    private

    def extract_content
      if has_summary?
        @summary, @content = body.split("\n\n")
      else
        @content = body.dup
      end
    end

    def scrub_content
      content.force_encoding(encoding).encode('UTF-8')
    end

    def has_summary?
      body.start_with?('Feed Processing Summary')
    end

    def summarize
      Hash[summary.split("\n\t")[1, 2].map { |line| line.split("\t\t") }]
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
peddler-0.8.0 lib/peddler/flat_file_parser.rb
peddler-0.7.11 lib/peddler/flat_file_parser.rb
peddler-0.7.10 lib/peddler/flat_file_parser.rb
peddler-0.7.9 lib/peddler/flat_file_parser.rb
peddler-0.7.8 lib/peddler/flat_file_parser.rb
peddler-0.7.7 lib/peddler/flat_file_parser.rb