Sha256: 346c47657fb7d4925c1b794bfba91e29b46715366f12449d2d6a9eda655a534d
Contents?: true
Size: 1.54 KB
Versions: 1
Compression:
Stored size: 1.54 KB
Contents
#!/usr/bin/env ruby require 'optparse' options = { rows: 250_000, no_header: false } OptionParser.new do |opts| opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>' opts.on('-h', '--help', 'Prints this help') do puts opts exit end opts.on('-r', '--rows ROWS', Integer, 'Max number of rows per a CSV') do |v| options[:rows] = v end opts.on('--no-header', 'CSV file has no header') do options[:no_header] = true end end.parse! file = ARGV[0] || raise('no CSV file specified') require 'csv-utils' def get_split_file_name(file, num_files, total_files) if file =~ /\.(?:csv|tsv)$/i file.sub(/\.(?:csv|tsv)$/i) { |m| "-part-#{num_files}-of-#{total_files}" + m } else file + "-part-#{num_files}" end end csv_options = CSVUtils::CSVOptions.new(file) csv = CSV.open( file, 'rb', col_sep: csv_options.col_separator, liberal_parsing: true ) headers = options[:no_header] ? nil : csv.shift total_rows = 0 while csv.shift total_rows +=1 end csv.rewind csv.shift if headers total_files = total_rows / options[:rows] total_files += 1 if (total_rows % options[:rows]) > 0 num_rows = 0 num_files = 0 out = nil append_row_proc = proc do |row| if out.nil? || num_rows >= options[:rows] out.close if out num_rows = 0 num_files += 1 out = CSV.open(get_split_file_name(file, num_files, total_files), 'wb', col_sep: csv_options.col_separator) out << headers if headers end out << row num_rows += 1 end while (row = csv.shift) append_row_proc.call(row) end out.close
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
csv-utils-0.3.22 | bin/csv-splitter |