bin/csv-validator in csv-utils-0.3.1 vs bin/csv-validator in csv-utils-0.3.2
- old
+ new
@@ -1,9 +1,14 @@
#!/usr/bin/env ruby
require 'csv'
-require 'rchardet'
+begin
+ require 'rchardet'
+rescue LoadError
+ $stderr.puts 'gem install rchardet'
+ exit 1
+end
def utf8?(str)
str
.force_encoding('utf-8')
.valid_encoding?
@@ -19,29 +24,37 @@
def detect_encoding(col)
CharDet.detect(col)['encoding']
end
csv = CSV.open(ARGV[0], 'rb')
+out = CSV.open(ARGV[1], 'wb') if ARGV[1]
headers = csv.shift
+out << headers if out
csv_lineno = 1
while (row = csv.shift)
csv_lineno += 1
unless row.size == headers.size
$stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
end
+ converted = false
row.each_with_index do |col, idx|
next if utf8?(col)
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
+ converted = true
puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
+ row[idx] = col_utf8_encoded
else
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
end
end
+
+ out << row if out && converted
end
csv.close
+out.close if out