lib/slicing.rb in slicing-0.1.0 vs lib/slicing.rb in slicing-0.1.1
- old
+ new
@@ -7,10 +7,85 @@
class Base < Thor
check_unknown_options!
package_name 'slicing'
default_task :help
+ # desc :gsub, ""
+ # def gsub path, output, first, second
+ # CSV.foreach(path,:headers=> true, :encoding => "ISO8859-1:utf-8") do |row|
+ # puts row
+ # row.map {|n| n.gsub(first,second) if n !=nil}
+ # CSV.open(output, "a+") do |csv|
+ # csv << row
+ # end
+ # end
+ #
+ # end
+ #
+ # desc :trim, "clean up by removing rows with column value"
+ # def trim path, output#, name, value
+ # CSV.foreach(path) do |row|
+ # row.map {|n| n.strip! || n}
+ # CSV.open(output, "a+") do |csv|
+ # csv << row
+ # end
+ # end
+ # end
+
+ desc :clean, "clean up by removing rows with column value"
+ def clean path, output, name, value
+ # puts "add header"
+ end
+
+ desc :add, "add a header"
+ def add path, output, *headers
+ index = 0
+ CSV.foreach(path) do |row|
+ CSV.open(output, "a+") do |csv|
+ if index == 0
+ csv << headers
+ end
+ csv << row
+ end
+ index = index +1
+ end
+ end
+
+ desc :show, "show a specific row"
+ def show path, output, start
+ index = 1
+ CSV.foreach(path) do |csv|
+ if index == start.to_i
+ puts csv
+ break
+ end
+ index = index + 1
+ end
+ end
+
+ desc :list, "list unique items in a column"
+ def list path, name
+ file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
+ array = file_csv[name]
+ puts array.uniq
+ puts "--"
+ puts "#{array.uniq.count} items"
+ end
+
+ desc :reduce, "reduce csv to smaller rows"
+ def reduce path, output, start
+ index = 0
+ CSV.foreach(path) do |csv|
+ CSV.open(output, "a+") do |row|
+ if start.to_i > index #dangerous
+ csv << row
+ end
+ end
+ index = index +1
+ end
+ end
+
desc :sample, "create a sample output"
def sample path, output_path, size
file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
sample = file_csv.sample(size)
CSV.open(output_path, "a+") do |csv|
@@ -37,21 +112,52 @@
csv << [value[0], value[1]]
end
end
end
-
desc :mask, "mask a particular column"
def mask path, column_name, output_path
original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
CSV.open(output_path, 'a+') do |csv|
original.each do |row|
csv << array
end
end
end
+ desc :retain, "retain only these column"
+ def retain path, output, *names
+ value = ""
+ CSV.foreach(path) do |data|
+ value = data
+ break
+ end
+
+ array = []
+ names.each do |each_name|
+ if value.index(each_name) == nil
+ puts "#{each_name} is not a column name."
+ puts "--"
+ puts value
+ exit
+ end
+ array.push(value.index(each_name)) if value.index(each_name) != nil
+ end
+ # puts array.count
+ answer =
+ CSV.open(output,"a+") do |csv|
+ CSV.foreach(path) do |row|
+ answer = []
+ array.each do |each|
+ answer.push(row[each])
+ end
+ csv << answer
+ end
+ end
+
+ end
+
desc :rm, "remove a column"
method_option :utf, type: :string, aliases: '-u', default: "ISO8859-1:utf-8"
method_option :headers, type: :boolean, aliases: '-h', default: true
method_option :rowsep, type: :string, aliases: '-r', default: nil
def rm path, column_name, output
@@ -88,10 +194,12 @@
def head csv_file
CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
puts row
puts "----"
puts "#{row.count} columns"
+ puts "----"
+ print_header(row)
exit
end
end
desc :unique, "calculate number of unique values in column"
@@ -106,10 +214,12 @@
def count csv_file
data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
puts "#{data.count} rows #{data[0].count} columns"
puts "---"
puts "#{data[0]}"
+ puts "---"
+ print_header(data[0])
end
desc :subset, "create a subset of the data"
method_option :line, type: :numeric, aliases: '-l', default: 1000
def subset(csv_file, output)
@@ -148,10 +258,14 @@
# end
# end
private
+ def print_header array
+ puts array.join(",") if array != nil
+ end
+
def process_options headers, rowsep, utf
if headers == nil
headers = true
else
headers = headers
@@ -167,8 +281,12 @@
hash = Hash.new(0)
array.each{|key| hash[key] += 1}
hash
end
+ def print_progress current, total
+ percent = current/total * 100
+ STDOUT.write "\r #{index} - #{percent}% completed."
+ end
end
end