lib/lightgbm/dataset.rb in lightgbm-0.1.1 vs lib/lightgbm/dataset.rb in lightgbm-0.1.2
- old
+ new
@@ -1,19 +1,30 @@
module LightGBM
class Dataset
attr_reader :data, :params
- def initialize(data, label: nil, weight: nil, params: nil)
+ def initialize(data, label: nil, weight: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
@data = data
+ # TODO stringify params
+ params ||= {}
+ params["categorical_feature"] ||= categorical_feature.join(",") if categorical_feature != "auto"
+ set_verbosity(params)
+
@handle = ::FFI::MemoryPointer.new(:pointer)
+ parameters = params_str(params)
+ reference = reference.handle_pointer if reference
if data.is_a?(String)
- check_result FFI.LGBM_DatasetCreateFromFile(data, params_str(params), nil, @handle)
+ check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
+ elsif used_indices
+ used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
+ used_row_indices.put_array_of_int32(0, used_indices)
+ check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
else
c_data = ::FFI::MemoryPointer.new(:float, data.count * data.first.count)
c_data.put_array_of_float(0, data.flatten)
- check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, params_str(params), nil, @handle)
+ check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, parameters, reference, @handle)
end
# causes "Stack consistency error"
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
set_field("label", label) if label
@@ -44,9 +55,19 @@
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
end
def dump_text(filename)
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
+ end
+
+ def subset(used_indices, params: nil)
+ # categorical_feature passed via params
+ params ||= self.params
+ Dataset.new(nil,
+ params: params,
+ reference: self,
+ used_indices: used_indices
+ )
end
def self.finalize(pointer)
-> { FFI.LGBM_DatasetFree(pointer) }
end