lib/lightgbm/dataset.rb in lightgbm-0.1.4 vs lib/lightgbm/dataset.rb in lightgbm-0.1.5
- old
+ new
@@ -1,15 +1,17 @@
module LightGBM
class Dataset
attr_reader :data, :params
- def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
+ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
@data = data
# TODO stringify params
params ||= {}
- params["categorical_feature"] ||= categorical_feature.join(",") if categorical_feature != "auto"
+ if categorical_feature != "auto" && categorical_feature.any?
+ params["categorical_feature"] ||= categorical_feature.join(",")
+ end
set_verbosity(params)
@handle = ::FFI::MemoryPointer.new(:pointer)
parameters = params_str(params)
reference = reference.handle_pointer if reference
@@ -24,29 +26,31 @@
nrow = data.row_count
ncol = data.column_count
flat_data = data.to_a.flatten
elsif daru?(data)
nrow, ncol = data.shape
- flat_data = data.each_vector.map(&:to_a).flatten
+ flat_data = data.map_rows(&:to_a).flatten
elsif narray?(data)
nrow, ncol = data.shape
flat_data = data.flatten.to_a
else
nrow = data.count
ncol = data.first.count
flat_data = data.flatten
end
+ handle_missing(flat_data)
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
c_data.put_array_of_float(0, flat_data)
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
end
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
self.label = label if label
self.weight = weight if weight
self.group = group if group
+ self.feature_names = feature_names if feature_names
end
def label
field("label")
end
@@ -57,18 +61,34 @@
def label=(label)
set_field("label", label)
end
+ def feature_names
+ # must preallocate space
+ num_feature_names = ::FFI::MemoryPointer.new(:int)
+ out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
+ str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:string, 255) }
+ out_strs.put_array_of_pointer(0, str_ptrs)
+ check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
+ str_ptrs[0, num_feature_names.read_int].map(&:read_string)
+ end
+
def weight=(weight)
set_field("weight", weight)
end
def group=(group)
set_field("group", group, type: :int32)
end
+ def feature_names=(feature_names)
+ c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
+ c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
+ check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
+ end
+
def num_data
out = ::FFI::MemoryPointer.new(:int)
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
out.read_int
end
@@ -127,21 +147,9 @@
else
c_data = ::FFI::MemoryPointer.new(:float, data.count)
c_data.put_array_of_float(0, data)
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
end
- end
-
- def matrix?(data)
- defined?(Matrix) && data.is_a?(Matrix)
- end
-
- def daru?(data)
- defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
- end
-
- def narray?(data)
- defined?(Numo::NArray) && data.is_a?(Numo::NArray)
end
include Utils
end
end