lib/remi/data_subjects/csv_file.rb in remi-0.3.1 vs lib/remi/data_subjects/csv_file.rb in remi-0.3.2
- old
+ new
@@ -76,10 +76,16 @@
logger.info "Converting #{filename} to a dataframe"
processed_filename = preprocess(filename)
csv_df = Daru::DataFrame.from_csv processed_filename, @csv_options
+ # Daru 0.1.4 doesn't add vectors if it's a headers-only file
+ if csv_df.vectors.size == 0
+ headers_df = Daru::DataFrame.from_csv processed_filename, @csv_options.merge(return_headers: true)
+ csv_df = Daru::DataFrame.new([], order: headers_df.vectors.to_a)
+ end
+
csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field
if idx == 0
result_df = csv_df
else
result_df = result_df.concat csv_df
@@ -151,18 +157,27 @@
# @return [Hash] Csv options hash
attr_reader :csv_options
# Converts the dataframe to a CSV file stored in the local work directory.
+ # If labels are present write the CSV file with those headers but maintain
+ # the structure of the original dataframe
#
# @param dataframe [Remi::DataFrame] The dataframe to be encoded
# @return [Object] The path to the file
def encode(dataframe)
logger.info "Writing CSV file to temporary location #{@working_file}"
+
+ label_columns = self.fields.reduce({}) { |h, (k, v)|
+ if v[:label]
+ h[k] = v[:label].to_sym
+ end
+ h
+ }
+ dataframe.rename_vectors label_columns
dataframe.write_csv @working_file, @csv_options
@working_file
end
-
private
def init_csv_file_encoder(*args, work_path: Settings.work_dir, csv_options: {}, **kargs, &block)
@working_file = File.join(work_path, SecureRandom.uuid)
@csv_options = self.class.default_csv_options.merge(csv_options)
end