Sha256: 052e835698fff3e527713155d017a035d948c2d98c52d505a9bc86d45597af1b

Contents?: true

Size: 1.29 KB

Versions: 5

Compression:

Stored size: 1.29 KB

Contents

# frozen_string_literal: true

require 'delegate'
require 'csv'
require 'digest/md5'
require 'peddler/headers'

module Peddler
  # @!visibility private
  class FlatFileParser < SimpleDelegator
    include Headers

    # http://stackoverflow.com/questions/8073920/importing-csv-quoting-error-is-driving-me-nuts
    OPTIONS = { col_sep: "\t", quote_char: "\x00", headers: true }.freeze
    private_constant :OPTIONS

    attr_reader :content, :summary

    def initialize(res, encoding)
      super(res)
      scrub_body!(encoding)
      extract_content_and_summary
    end

    def parse(&blk)
      CSV.parse(content, **OPTIONS, &blk) unless content.empty?
    end

    def records_count
      summarize if summary
    end

    def valid?
      headers['Content-MD5'] == Digest::MD5.base64digest(body)
    end

    private

    def scrub_body!(encoding)
      return if body.encoding == Encoding::UTF_8

      self.body = body.dup.force_encoding(content_charset || encoding)
    end

    def extract_content_and_summary
      @content = body.encode('UTF-8', invalid: :replace, undef: :replace)
      return unless @content.match?(/\t\t.*\n\n/)

      @summary, @content = @content.split("\n\n", 2)
    end

    def summarize
      Hash[summary.split("\n\t")[1, 2].map { |line| line.split("\t\t") }]
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
peddler-2.4.4 lib/peddler/flat_file_parser.rb
peddler-2.4.3 lib/peddler/flat_file_parser.rb
peddler-2.4.1 lib/peddler/flat_file_parser.rb
peddler-2.4.0 lib/peddler/flat_file_parser.rb
peddler-2.3.0 lib/peddler/flat_file_parser.rb