bin/csv-validator in csv-utils-0.3.1 vs bin/csv-validator in csv-utils-0.3.2

- old
+ new

@@ -1,9 +1,14 @@ #!/usr/bin/env ruby require 'csv' -require 'rchardet' +begin + require 'rchardet' +rescue LoadError + $stderr.puts 'gem install rchardet' + exit 1 +end def utf8?(str) str .force_encoding('utf-8') .valid_encoding? @@ -19,29 +24,37 @@ def detect_encoding(col) CharDet.detect(col)['encoding'] end csv = CSV.open(ARGV[0], 'rb') +out = CSV.open(ARGV[1], 'wb') if ARGV[1] headers = csv.shift +out << headers if out csv_lineno = 1 while (row = csv.shift) csv_lineno += 1 unless row.size == headers.size $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}" end + converted = false row.each_with_index do |col, idx| next if utf8?(col) $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\"" if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col))) + converted = true puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\"" + row[idx] = col_utf8_encoded else $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding" end end + + out << row if out && converted end csv.close +out.close if out