csv_file.rb in remi-0.3.2

- old
+ new

@@ -76,10 +76,16 @@
 
         logger.info "Converting #{filename} to a dataframe"
         processed_filename = preprocess(filename)
         csv_df = Daru::DataFrame.from_csv processed_filename, @csv_options
 
+        # Daru 0.1.4 doesn't add vectors if it's a headers-only file
+        if csv_df.vectors.size == 0
+          headers_df = Daru::DataFrame.from_csv processed_filename, @csv_options.merge(return_headers: true)
+          csv_df = Daru::DataFrame.new([], order: headers_df.vectors.to_a)
+        end
+
         csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field
         if idx == 0
           result_df = csv_df
         else
           result_df = result_df.concat csv_df
@@ -151,18 +157,27 @@
 
     # @return [Hash] Csv options hash
     attr_reader :csv_options
 
     # Converts the dataframe to a CSV file stored in the local work directory.
+    # If labels are present write the CSV file with those headers but maintain
+    # the structure of the original dataframe
     #
     # @param dataframe [Remi::DataFrame] The dataframe to be encoded
     # @return [Object] The path to the file
     def encode(dataframe)
       logger.info "Writing CSV file to temporary location #{@working_file}"
+
+      label_columns = self.fields.reduce({}) { |h, (k, v)|
+        if v[:label]
+          h[k] = v[:label].to_sym
+        end
+        h
+      }
+      dataframe.rename_vectors label_columns
       dataframe.write_csv @working_file, @csv_options
       @working_file
     end
-
     private
     def init_csv_file_encoder(*args, work_path: Settings.work_dir, csv_options: {}, **kargs, &block)
       @working_file = File.join(work_path, SecureRandom.uuid)
       @csv_options = self.class.default_csv_options.merge(csv_options)
     end