Sha256: 3183c99cddadb0530c277afcc379c7d4ec198080e73ba28329e301ae72a2303c

Contents?: true

Size: 1.47 KB

Versions: 1

Compression:

Stored size: 1.47 KB

Contents

require 'csv'


module Cevennes

  VERSION = '0.9.0'

  class << self

    def diff(id, csv0, csv1)

      h0 = hash(id, csv0)
      h1 = hash(id, csv1)

      ks0 = h0.delete(:keys)
      ks1 = h1.delete(:keys)

      d =
        h0
          .collect { |k, v|
            v1 = h1[k]
            if v1 == v
              [ '=', v, nil ]
            else
              [ v1 == nil ? '-' : '!', v, v1 ]
            end }
          .compact +
        (h1.keys - h0.keys)
          .collect { |k|
            v = h1[k]
            [ '+', nil, h1[k] ] }

      s = d.inject({}) { |h, (a, _, _)| h[a] = (h[a] || 0) + 1; h }
      s['l0'] = h0.length - 1
      s['l1'] = h1.length - 1

      [ [ 'keys', ks0, ks1 ], [ 'stats', s ] ] + d
    end

    protected

    def hash(id, csv)

      csva = ::CSV.parse(reencode(csv))
        .reject { |row| row.compact.empty? }
        .drop_while { |row| ! row.include?(id) }

      i = csva[0].index(id)

      csva[1..-1]
        .inject({ keys: csva[0] }) { |h, row|
          if row.compact.length > 1
            k = row[i]
            h[k] = row if k
          end
          h }
    end

    #def deflate(row)
    #  ::CSV.generate(encoding: 'UTF-8') { |csv| csv << row }.strip
    #end

    def reencode(s)

      #s = unzip(s) if s[0, 2] == 'PK'
        # no dependency on rubyzip

      %w[ Windows-1252 ISO-8859-1 UTF-8 ].each do |e|
        ss = s.force_encoding(e).encode('UTF-8') rescue nil
        break ss if ss
        nil
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
cevennes-0.9.0 lib/cevennes.rb