lib/lightgbm/booster.rb in lightgbm-0.1.1 vs lib/lightgbm/booster.rb in lightgbm-0.1.2
- old
+ new
@@ -1,59 +1,67 @@
module LightGBM
class Booster
+ attr_accessor :best_iteration, :train_data_name
+
def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
@handle = ::FFI::MemoryPointer.new(:pointer)
if model_str
- out_num_iterations = ::FFI::MemoryPointer.new(:int)
- check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
+ model_from_string(model_str)
elsif model_file
out_num_iterations = ::FFI::MemoryPointer.new(:int)
check_result FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, @handle)
else
+ params ||= {}
+ set_verbosity(params)
check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
end
# causes "Stack consistency error"
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
+
+ self.best_iteration = -1
+
+ # TODO get names when loaded from file
+ @name_valid_sets = []
end
def self.finalize(pointer)
-> { FFI.LGBM_BoosterFree(pointer) }
end
- # TODO handle name
def add_valid(data, name)
check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
+ @name_valid_sets << name
self # consistent with Python API
end
- def predict(input)
- raise TypeError unless input.is_a?(Array)
+ def current_iteration
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
+ out.read_int
+ end
- singular = input.first.is_a?(Array)
- input = [input] unless singular
-
- data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
- data.put_array_of_float(0, input.flatten)
-
+ def dump_model(num_iteration: nil, start_iteration: 0)
+ num_iteration ||= best_iteration
+ buffer_len = 1 << 20
out_len = ::FFI::MemoryPointer.new(:int64)
- out_result = ::FFI::MemoryPointer.new(:double, input.count)
- parameter = ""
- check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 0, input.count, input.first.count, 1, 0, 0, parameter, out_len, out_result)
- out = out_result.read_array_of_double(out_len.read_int64)
-
- singular ? out : out.first
+ out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
+ actual_len = out_len.read_int64
+ if actual_len > buffer_len
+ out_str = ::FFI::MemoryPointer.new(:string, actual_len)
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
+ end
+ out_str.read_string
end
+ alias_method :to_json, :dump_model
- def save_model(filename)
- check_result FFI.LGBM_BoosterSaveModel(handle_pointer, 0, 0, filename)
- self # consistent with Python API
+ def eval_valid
+ @name_valid_sets.each_with_index.map { |n, i| inner_eval(n, i + 1) }.flatten(1)
end
- def update
- finished = ::FFI::MemoryPointer.new(:int)
- check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
- finished.read_int == 1
+ def eval_train
+ inner_eval(train_data_name, 0)
end
def feature_importance(iteration: nil, importance_type: "split")
iteration ||= best_iteration
importance_type =
@@ -64,33 +72,22 @@
1
else
-1
end
- num_features = self.num_features
- out_result = ::FFI::MemoryPointer.new(:double, num_features)
+ num_feature = self.num_feature
+ out_result = ::FFI::MemoryPointer.new(:double, num_feature)
check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
- out_result.read_array_of_double(num_features)
+ out_result.read_array_of_double(num_feature)
end
- def num_features
- out = ::FFI::MemoryPointer.new(:int)
- check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
- out.read_int
+ def model_from_string(model_str)
+ out_num_iterations = ::FFI::MemoryPointer.new(:int)
+ check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
+ self
end
- def current_iteration
- out = ::FFI::MemoryPointer.new(:int)
- check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
- out.read_int
- end
-
- # TODO fix
- def best_iteration
- -1
- end
-
def model_to_string(num_iteration: nil, start_iteration: 0)
num_iteration ||= best_iteration
buffer_len = 1 << 20
out_len = ::FFI::MemoryPointer.new(:int64)
out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
@@ -101,27 +98,102 @@
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
end
out_str.read_string
end
- def to_json(num_iteration: nil, start_iteration: 0)
+ def num_feature
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
+ out.read_int
+ end
+ alias_method :num_features, :num_feature # legacy typo
+
+ def num_model_per_iteration
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI::LGBM_BoosterNumModelPerIteration(handle_pointer, out)
+ out.read_int
+ end
+
+ def num_trees
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI::LGBM_BoosterNumberOfTotalModel(handle_pointer, out)
+ out.read_int
+ end
+
+ # TODO support different prediction types
+ def predict(input, num_iteration: nil, **params)
+ raise TypeError unless input.is_a?(Array)
+
+ singular = !input.first.is_a?(Array)
+ input = [input] if singular
+
num_iteration ||= best_iteration
- buffer_len = 1 << 20
+ num_class ||= num_class()
+
+ data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
+ data.put_array_of_float(0, input.flatten)
+
out_len = ::FFI::MemoryPointer.new(:int64)
- out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
- check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
- actual_len = out_len.read_int64
- if actual_len > buffer_len
- out_str = ::FFI::MemoryPointer.new(:string, actual_len)
- check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
- end
- out_str.read_string
+ out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
+ check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 0, input.count, input.first.count, 1, 0, num_iteration, params_str(params), out_len, out_result)
+ out = out_result.read_array_of_double(out_len.read_int64)
+ out = out.each_slice(num_class).to_a if num_class > 1
+
+ singular ? out.first : out
end
+ def save_model(filename, num_iteration: nil, start_iteration: 0)
+ num_iteration ||= best_iteration
+ check_result FFI.LGBM_BoosterSaveModel(handle_pointer, start_iteration, num_iteration, filename)
+ self # consistent with Python API
+ end
+
+ def update
+ finished = ::FFI::MemoryPointer.new(:int)
+ check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
+ finished.read_int == 1
+ end
+
private
def handle_pointer
@handle.read_pointer
+ end
+
+ def eval_counts
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI::LGBM_BoosterGetEvalCounts(handle_pointer, out)
+ out.read_int
+ end
+
+ def eval_names
+ eval_counts ||= eval_counts()
+ out_len = ::FFI::MemoryPointer.new(:int)
+ out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
+ str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:string, 255) }
+ out_strs.put_array_of_pointer(0, str_ptrs)
+ check_result FFI.LGBM_BoosterGetEvalNames(handle_pointer, out_len, out_strs)
+ str_ptrs.map(&:read_string)
+ end
+
+ def inner_eval(name, i)
+ eval_names ||= eval_names()
+
+ out_len = ::FFI::MemoryPointer.new(:int)
+ out_results = ::FFI::MemoryPointer.new(:double, eval_names.count)
+ check_result FFI.LGBM_BoosterGetEval(handle_pointer, i, out_len, out_results)
+ vals = out_results.read_array_of_double(out_len.read_int)
+
+ eval_names.zip(vals).map do |eval_name, val|
+ higher_better = ["auc", "ndcg@", "map@"].any? { |v| eval_name.start_with?(v) }
+ [name, eval_name, val, higher_better]
+ end
+ end
+
+ def num_class
+ out = ::FFI::MemoryPointer.new(:int)
+ check_result FFI::LGBM_BoosterGetNumClasses(handle_pointer, out)
+ out.read_int
end
include Utils
end
end