lib/cevennes.rb in cevennes-1.1.0 vs lib/cevennes.rb in cevennes-1.1.1

- old
+ new

@@ -1,12 +1,13 @@ +# frozen_string_literal: true require 'csv' module Cevennes - VERSION = '1.1.0' + VERSION = '1.1.1' class << self def diff(id, csv0, csv1, opts={}) @@ -47,33 +48,32 @@ def strip(row) row.collect { |cell| cell.is_a?(String) ? cell.strip : cell } end - DOWNCASE_IF_POSSIBLE = - lambda { |x| x.respond_to?(:downcase) ? x.downcase : x } - IDENTITY = - lambda { |x| x } + DOWNCASE = lambda { |x| x.respond_to?(:downcase) ? x.downcase : x } + IDENTITY = lambda { |x| x } def hash(version, id, csv, opts) - d = opts[:ignore_key_case] ? DOWNCASE_IF_POSSIBLE : IDENTITY + d = opts[:ignore_key_case] ? DOWNCASE : IDENTITY + did = d[id] csva = ::CSV.parse(reencode(csv)) .each_with_index.collect { |row, i| [ 1 + i, strip(row) ] } .reject { |i, row| row.compact.empty? } - .drop_while { |i, row| ! row.find { |cell| d[cell] == id } } + .drop_while { |i, row| ! row.find { |cell| d[cell] == did } } fail ::IndexError.new("id #{id.inspect} not found in #{version} CSV") \ if csva.empty? csva[0][1] = opts[:ignore_key_case] ? - csva[0][1].collect { |c| DOWNCASE_IF_POSSIBLE[c] } : + csva[0][1].collect { |c| DOWNCASE[c] } : csva[0][1] - idi = csva[0][1].index(id) + idi = csva[0][1].index(did) csva[1..-1] .inject({ keys: csva[0] }) { |h, (i, row)| if row.compact.length > 1 k = row[idi] @@ -84,19 +84,26 @@ #def deflate(row) # ::CSV.generate(encoding: 'UTF-8') { |csv| csv << row }.strip #end + ENCODINGS = %w[ Windows-1252 ISO-8859-1 UTF-8 ].freeze + def reencode(s) #s = unzip(s) if s[0, 2] == 'PK' # no dependency on rubyzip - %w[ Windows-1252 ISO-8859-1 UTF-8 ].each do |e| - ss = s.force_encoding(e).encode('UTF-8') rescue nil - break ss if ss - nil + #return s if s.encoding == Encoding::UTF_8 + # NO! have to force_encoding for UTF-8 as well! + + s = s.dup if s.frozen? + + ENCODINGS.each do |e| + (return s.force_encoding(e).encode('UTF-8')) rescue nil end + + nil end end end