Sha256: c1e48acfc2d57c865336d3c734a7ee318197bed9ae30e8ad5f9cbf7845f806d1
Contents?: true
Size: 1.87 KB
Versions: 2
Compression:
Stored size: 1.87 KB
Contents
module DF #:nodoc: module PreProcess #:nodoc: # A weird name. This creates a column for every category in a column # and marks each row by its value def j_binary_ize!(*columns) # Allows to mix a hash with the columns. options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)} columns.delete_if {|e| e.is_a?(Hash)} # Generates new columns columns.each do |col| values = render_column(col.to_underscore_sym) values.categories.each do |category| full_name = (col.to_s + "_" + category.to_s).to_sym if options[:allow_overlap] category_map = values.inject([]) do |list, e| list << values.all_categories(e) end self.append!(full_name, category_map.map{|e| e.include?(category)}) else self.append!(full_name, values.category_map.map{|e| e == category}) end end end end # Adds a column, numerical_column_name that shows the same data as a # nominal value, but as a number. def numericize!(*columns) columns.each do |col| values = render_column(col.to_underscore_sym) categories = values.categories value_categories = values.map {|v| values.category(v)} i = 0 category_map = value_categories.uniq.inject({}) do |h, c| h[c] = i i += 1 h end blank = Array.new(category_map.size, 0) reverse_category_map = category_map.inject({}) {|h, e| h[e.last] = e.first; h} new_values = values.inject([]) do |list, val| a = blank.dup a[category_map[values.category(val)]] = 1 list << a end new_name = "numerical #{col.to_s}".to_underscore_sym self.append!(new_name, new_values) end end end end class DataFrame include DF::PreProcess end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
davidrichards-data_frame-0.0.19 | lib/data_frame/core/pre_process.rb |
davidrichards-data_frame-0.0.20 | lib/data_frame/core/pre_process.rb |