Sha256: c2bc91c2dc0d33e611e4261de8f27bae65db7234f695bd5526f633c1798250b5

Contents?: true

Size: 1.09 KB

Versions: 6

Compression:

Stored size: 1.09 KB

Contents

require 'delegate'
require 'csv'
require 'digest/md5'

module Peddler
  # @api private
  class FlatFileParser < SimpleDelegator
    # http://stackoverflow.com/questions/8073920/importing-csv-quoting-error-is-driving-me-nuts
    OPTIONS = { col_sep: "\t", quote_char: "\x00", headers: true }

    attr_reader :content, :summary, :encoding

    def initialize(res, encoding)
      super(res)
      @encoding = encoding
      extract_content
    end

    def parse(&blk)
      CSV.parse(scrub_content, OPTIONS, &blk) if content
    end

    def records_count
      summarize if summary?
    end

    def valid?
      headers['Content-MD5'] == Digest::MD5.base64digest(body)
    end

    private

    def extract_content
      if summary?
        @summary, @content = body.split("\n\n")
      else
        @content = body.dup
      end
    end

    def scrub_content
      content.force_encoding(encoding).encode('UTF-8')
    end

    def summary?
      body.start_with?('Feed Processing Summary')
    end

    def summarize
      Hash[summary.split("\n\t")[1, 2].map { |line| line.split("\t\t") }]
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
peddler-1.1.0 lib/peddler/flat_file_parser.rb
peddler-1.0.2 lib/peddler/flat_file_parser.rb
peddler-1.0.1 lib/peddler/flat_file_parser.rb
peddler-1.0.0 lib/peddler/flat_file_parser.rb
peddler-0.19.0 lib/peddler/flat_file_parser.rb
peddler-0.18.0 lib/peddler/flat_file_parser.rb