require 'date' require 'gooddata/extract' module GoodData module Command class Datasets # List all data sets present in the project specified by the --project option # # == Usage # # gooddata datasets --project # gooddata datasets:list --project # # * --project - GoodData project identifier # def index connect with_project do |project_id| Project[project_id].datasets.each do |ds| puts "#{ds.uri}\t#{ds.identifier}\t#{ds.title}" end end end # Describe a data set. Currently, only a CSV data set is supported. # # The command prescans the data set, picks possible LDM types for it's # fields and asks user for confirmation. # # == Usage # # gooddata datasets:describe --file-csv --name --output # # * --file-csv - path to the CSV file (required) # * --name - name of the data set (user will be prompted unless provided) # * --output - name of the output JSON file with the model description (user will be prompted unless provided) # def describe columns = ask_for_fields name = extract_option('--name') || ask("Enter the dataset name") output = extract_option('--output') || ask("Enter path to the file where to save the model description", :default => "#{name}.json") open output, 'w' do |f| f << JSON.pretty_generate( :title => name, :columns => columns ) + "\n" f.flush end end # Creates a server-side model based on local model description. The model description # is read from a JSON file that can be generated using the +datasets:describe+ command # # == Usage # # gooddata datasets:apply --project # # * --project - GoodData project identifier # * data set config - JSON file with the model description (possibly generated by the datasets:describe command) # def apply connect with_project do |project_id| cfg_file = args.shift rescue nil raise(CommandFailed, "Usage: #{$0} ") unless cfg_file config = JSON.load open(cfg_file) rescue raise(CommandFailed, "Error reading dataset config file '#{cfg_file}'") objects = Project[project_id].add_dataset config['title'], config['columns'] puts "Dataset #{config['title']} added to the project, #{objects['uris'].length} metadata objects affected" end end # Load a CSV file into an existing server-side data set # # == Usage # # gooddata datasets:load --project # # * --project - GoodData project identifier # * file - CSV file to load # * data set config - JSON file with the model description (possibly generated by the datasets:describe command) # def load connect with_project do |project_id| file, cfg_file = args raise(CommandFailed, "Usage: #{$0} datasets:load ") unless cfg_file config = JSON.load open(cfg_file) rescue raise(CommandFailed, "Error reading dataset config file '#{cfg_file}'") schema = Model::Schema.new config Project[project_id].upload file, schema end end private def with_project unless @project_id @project_id = extract_option('--project') raise CommandFailed.new("Project not specified, use the --project switch") unless @project_id end yield @project_id end def ask_for_fields guesser = Guesser.new create_dataset.read guess = guesser.guess(1000) model = [] connection_point_set = false question_fmt = 'Select data type of column #%i (%s)' guesser.headers.each_with_index do |header, i| options = guess[header].map { |t| t.to_s } options = options.select { |t| t != :connection_point.to_s } if connection_point_set type = ask question_fmt % [ i + 1, header ], :answers => options model.push :title => header, :name => header, :type => type.upcase connection_point_set = true if type == :connection_point.to_s end model end def create_dataset file = extract_option('--file-csv') return Extract::CsvFile.new(file) if file raise CommandFailed.new("Unknown data set. Please specify a data set using --file-csv option (more supported data sources to come!)") end end ## # Utility class to guess data types of a data stream by looking at first couple of rows # class Guesser TYPES_PRIORITY = [ :connection_point, :fact, :date, :attribute ] attr_reader :headers class << self def sort_types(types) types.sort do |x, y| TYPES_PRIORITY.index(x) <=> TYPES_PRIORITY.index(y) end end end def initialize(reader) @reader = reader @headers = reader.shift.map! { |h| h.to_s } or raise "Empty data set" @pros = {}; @cons = {}; @seen = {} @headers.map do |h| @cons[h.to_s] = {} @pros[h.to_s] = {} @seen[h.to_s] = {} end end def guess(limit) count = 0 while row = @reader.shift break unless row && !row.empty? && count < limit raise "%i fields in row %i, %i expected" % [ row.size, count + 1, @headers.size ] if row.size != @headers.size row.each_with_index do |value, j| header = @headers[j] number = check_number(header, value) date = check_date(header, value) store_guess header, { @pros => :attribute } unless number || date hash_increment @seen[header], value end count += 1 end # fields with unique values are connection point candidates @seen.each do |header, values| store_guess header, { @pros => :connection_point } if values.size == count end guess_result end private def guess_result result = {} @headers.each do |header| result[header] = Guesser::sort_types @pros[header].keys.select { |type| @cons[header][type].nil? } end result end def hash_increment(hash, key) if hash[key] hash[key] += 1 else hash[key] = 1 end end def check_number(header, value) if value.nil? || value =~ /^[\+-]?\d*(\.\d*)?$/ return store_guess(header, @pros => [ :fact, :attribute ] ) end store_guess header, { @cons => :fact } end def check_date(header, value) return store_guess(header, @pros => [ :date, :attribute, :fact ]) if value.nil? || value == '0000-00-00' begin DateTime.parse value return store_guess(header, @pros => [ :date, :attribute ]) rescue ArgumentError; end store_guess header, { @cons => :date } end ## # Stores a guess about given header. # # Returns true if the @pros key is present, false otherwise # # === Parameters # # * +header+ - A header name # * +guess+ - A hash with optional @pros and @cons keys # def store_guess(header, guess) result = !guess[@pros].nil? [@pros, @cons].each do |hash| if guess[hash] then guess[hash] = [ guess[hash] ] unless guess[hash].is_a? Array guess[hash].each { |type| hash_increment hash[header], type } end end result end end end end