lib/remi/data_subjects/csv_file.rb in remi-0.3.1 vs lib/remi/data_subjects/csv_file.rb in remi-0.3.2

- old
+ new

@@ -76,10 +76,16 @@ logger.info "Converting #{filename} to a dataframe" processed_filename = preprocess(filename) csv_df = Daru::DataFrame.from_csv processed_filename, @csv_options + # Daru 0.1.4 doesn't add vectors if it's a headers-only file + if csv_df.vectors.size == 0 + headers_df = Daru::DataFrame.from_csv processed_filename, @csv_options.merge(return_headers: true) + csv_df = Daru::DataFrame.new([], order: headers_df.vectors.to_a) + end + csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field if idx == 0 result_df = csv_df else result_df = result_df.concat csv_df @@ -151,18 +157,27 @@ # @return [Hash] Csv options hash attr_reader :csv_options # Converts the dataframe to a CSV file stored in the local work directory. + # If labels are present write the CSV file with those headers but maintain + # the structure of the original dataframe # # @param dataframe [Remi::DataFrame] The dataframe to be encoded # @return [Object] The path to the file def encode(dataframe) logger.info "Writing CSV file to temporary location #{@working_file}" + + label_columns = self.fields.reduce({}) { |h, (k, v)| + if v[:label] + h[k] = v[:label].to_sym + end + h + } + dataframe.rename_vectors label_columns dataframe.write_csv @working_file, @csv_options @working_file end - private def init_csv_file_encoder(*args, work_path: Settings.work_dir, csv_options: {}, **kargs, &block) @working_file = File.join(work_path, SecureRandom.uuid) @csv_options = self.class.default_csv_options.merge(csv_options) end