lib/squib/api/data.rb in squib-0.6.0 vs lib/squib/api/data.rb in squib-0.7.0

- old
+ new

@@ -1,99 +1,100 @@ -require 'roo' -require 'csv' - -module Squib - - # Pulls Excel data from `.xlsx` files into a column-based hash - # - # Pulls the data into a Hash of arrays based on the columns. First row is assumed to be the header row. - # See the example `samples/excel.rb` in the [source repository](https://github.com/andymeneely/squib/tree/master/samples) - # - # @example - # # Excel file looks like this: - # # | h1 | h2 | - # # ------------ - # # | 1 | 2 | - # # | 3 | 4 | - # data = xlsx file: 'data.xlsx', sheet: 0 - # {'h1' => [1,3], 'h2' => [2,4]} - # - # @option opts file [String] the file to open. Must end in `.xlsx`. Opens relative to the current directory. - # @option opts sheet [Integer] (0) The zero-based index of the sheet from which to read. - # @return [Hash] a hash of arrays based on columns in the spreadsheet - # @api public - def xlsx(opts = {}) - opts = Squib::SYSTEM_DEFAULTS.merge(opts) - opts = Squib::InputHelpers.fileify(opts) - s = Roo::Excelx.new(opts[:file]) - s.default_sheet = s.sheets[opts[:sheet]] - data = {} - s.first_column.upto(s.last_column) do |col| - header = s.cell(s.first_row,col).to_s - data[header] = [] - (s.first_row + 1).upto(s.last_row) do |row| - cell = s.cell(row,col) - # Roo hack for avoiding unnecessary .0's on whole integers (https://github.com/roo-rb/roo/issues/139) - cell = s.excelx_value(row,col) if s.excelx_type(row,col) == [:numeric_or_formula, 'General'] - data[header] << cell - end#row - end#col - data - end#xlsx - module_function :xlsx - - # Pulls CSV data from `.csv` files into a column-based hash - # - # Pulls the data into a Hash of arrays based on the columns. First row is assumed to be the header row. - # See the example `samples/csv.rb` in the [source repository](https://github.com/andymeneely/squib/tree/master/samples) - # - # @example - # # File data.csv looks like this (without the comment symbols) - # # h1,h2 - # # 1,2 - # # 3,4 - # data = csv file: 'data.csv' - # => {'h1' => [1,3], 'h2' => [2,4]} - # - # Parsing uses Ruby's CSV, with options `{headers: true, converters: :numeric}` - # http://www.ruby-doc.org/stdlib-2.0/libdoc/csv/rdoc/CSV.html - # - # @option opts file [String] the CSV-formatted file to open. Opens relative to the current directory. - # @return [Hash] a hash of arrays based on columns in the table - # @api public - def csv(opts = {}) - opts = Squib::SYSTEM_DEFAULTS.merge(opts) - opts = Squib::InputHelpers.fileify(opts) - table = CSV.read(opts[:file], headers: true, converters: :numeric) - check_duplicate_csv_headers(table) - hash = Hash.new - table.headers.each do |header| - hash[header.to_s] ||= table[header] - end - return hash - end - module_function :csv - - # Check if the given CSV table has duplicate columns, and throw a warning - # @api private - def check_duplicate_csv_headers(table) - if table.headers.size != table.headers.uniq.size - dups = table.headers.select{|e| table.headers.count(e) > 1 } - Squib.logger.warn "CSV duplicated the following column keys: #{dups.join(',')}" - end - end - module_function :check_duplicate_csv_headers - - class Deck - - # Convenience call on deck goes to the module function - def xlsx(opts = {}) - Squib.xlsx(opts) - end - - # Convenience call on deck goes to the module function - def csv(opts = {}) - Squib.csv(opts) - end - - end -end +require 'roo' +require 'csv' +require 'squib/args/input_file' + +module Squib + + # Pulls Excel data from `.xlsx` files into a column-based hash + # + # Pulls the data into a Hash of arrays based on the columns. First row is assumed to be the header row. + # See the example `samples/excel.rb` in the [source repository](https://github.com/andymeneely/squib/tree/master/samples) + # + # @example + # # Excel file looks like this: + # # | h1 | h2 | + # # ------------ + # # | 1 | 2 | + # # | 3 | 4 | + # data = xlsx file: 'data.xlsx', sheet: 0 + # {'h1' => [1,3], 'h2' => [2,4]} + # + # @option opts file [String] the file to open. Must end in `.xlsx`. Opens relative to the current directory. + # @option opts sheet [Integer] (0) The zero-based index of the sheet from which to read. + # @return [Hash] a hash of arrays based on columns in the spreadsheet + # @api public + def xlsx(opts = {}) + input = Args::InputFile.new(file: 'deck.xlsx').load!(opts) + s = Roo::Excelx.new(input.file[0]) + s.default_sheet = s.sheets[input.sheet[0]] + data = {} + s.first_column.upto(s.last_column) do |col| + header = s.cell(s.first_row,col).to_s + data[header] = [] + (s.first_row + 1).upto(s.last_row) do |row| + cell = s.cell(row,col) + # Roo hack for avoiding unnecessary .0's on whole integers (https://github.com/roo-rb/roo/issues/139) + cell = s.excelx_value(row,col) if s.excelx_type(row,col) == [:numeric_or_formula, 'General'] + data[header] << cell + end#row + end#col + data + end#xlsx + module_function :xlsx + + # Pulls CSV data from `.csv` files into a column-based hash + # + # Pulls the data into a Hash of arrays based on the columns. First row is assumed to be the header row. + # See the example `samples/csv.rb` in the [source repository](https://github.com/andymeneely/squib/tree/master/samples) + # + # @example + # # File data.csv looks like this (without the comment symbols) + # # h1,h2 + # # 1,2 + # # 3,4 + # data = csv file: 'data.csv' + # => {'h1' => [1,3], 'h2' => [2,4]} + # + # Parsing uses Ruby's CSV, with options `{headers: true, converters: :numeric}` + # http://www.ruby-doc.org/stdlib-2.0/libdoc/csv/rdoc/CSV.html + # + # @option opts file [String] the CSV-formatted file to open. Opens relative to the current directory. + # @return [Hash] a hash of arrays based on columns in the table + # @api public + def csv(opts = {}) + file = Args::InputFile.new(file: 'deck.csv').load!(opts).file[0] + opts = Squib::SYSTEM_DEFAULTS.merge(opts) + # opts = Squib::InputHelpers.fileify(opts) + table = CSV.read(file, headers: true, converters: :numeric) + check_duplicate_csv_headers(table) + hash = Hash.new + table.headers.each do |header| + hash[header.to_s] ||= table[header] + end + return hash + end + module_function :csv + + # Check if the given CSV table has duplicate columns, and throw a warning + # @api private + def check_duplicate_csv_headers(table) + if table.headers.size != table.headers.uniq.size + dups = table.headers.select{|e| table.headers.count(e) > 1 } + Squib.logger.warn "CSV duplicated the following column keys: #{dups.join(',')}" + end + end + module_function :check_duplicate_csv_headers + + class Deck + + # Convenience call on deck goes to the module function + def xlsx(opts = {}) + Squib.xlsx(opts) + end + + # Convenience call on deck goes to the module function + def csv(opts = {}) + Squib.csv(opts) + end + + end +end