lib/xgb/booster.rb in xgb-0.1.0 vs lib/xgb/booster.rb in xgb-0.1.1

- old
+ new

@@ -1,22 +1,38 @@ module Xgb class Booster + attr_accessor :best_iteration, :feature_names + def initialize(params: nil, model_file: nil) @handle = ::FFI::MemoryPointer.new(:pointer) check_result FFI.XGBoosterCreate(nil, 0, @handle) if model_file check_result FFI.XGBoosterLoadModel(handle_pointer, model_file) end + self.best_iteration = 0 set_param(params) - @num_class = (params && params[:num_class]) || 1 end def update(dtrain, iteration) check_result FFI.XGBoosterUpdateOneIter(handle_pointer, iteration, dtrain.handle_pointer) end + def eval_set(evals, iteration) + dmats = ::FFI::MemoryPointer.new(:pointer, evals.size) + dmats.write_array_of_pointer(evals.map { |v| v[0].handle_pointer }) + + evnames = ::FFI::MemoryPointer.new(:pointer, evals.size) + evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) }) + + out_result = ::FFI::MemoryPointer.new(:pointer) + + check_result FFI.XGBoosterEvalOneIter(handle_pointer, iteration, dmats, evnames, evals.size, out_result) + + out_result.read_pointer.read_string + end + def set_param(params, value = nil) if params.is_a?(Enumerable) params.each do |k, v| check_result FFI.XGBoosterSetParam(handle_pointer, k.to_s, v.to_s) end @@ -25,19 +41,111 @@ end end def predict(data, ntree_limit: nil) ntree_limit ||= 0 - out_len = ::FFI::MemoryPointer.new(:long) + out_len = ::FFI::MemoryPointer.new(:ulong) out_result = ::FFI::MemoryPointer.new(:pointer) check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result) - out = out_result.read_pointer.read_array_of_float(out_len.read_long) - out = out.each_slice(@num_class).to_a if @num_class > 1 + out = out_result.read_pointer.read_array_of_float(out_len.read_ulong) + num_class = out.size / data.num_row + out = out.each_slice(num_class).to_a if num_class > 1 out end def save_model(fname) check_result FFI.XGBoosterSaveModel(handle_pointer, fname) + end + + # returns an array of strings + def dump(fmap: "", with_stats: false, dump_format: "text") + out_len = ::FFI::MemoryPointer.new(:ulong) + out_result = ::FFI::MemoryPointer.new(:pointer) + check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result) + out_result.read_pointer.get_array_of_string(0, out_len.read_ulong) + end + + def dump_model(fout, fmap: "", with_stats: false, dump_format: "text") + ret = dump(fmap: fmap, with_stats: with_stats, dump_format: dump_format) + File.open(fout, "wb") do |f| + if dump_format == "json" + f.print("[\n") + ret.each_with_index do |r, i| + f.print(r) + f.print(",\n") if i < ret.size - 1 + end + f.print("\n]") + else + ret.each_with_index do |r, i| + f.print("booster[#{i}]:\n") + f.print(r) + end + end + end + end + + def fscore(fmap: "") + # always weight + score(fmap: fmap, importance_type: "weight") + end + + def score(fmap: "", importance_type: "weight") + if importance_type == "weight" + trees = dump(fmap: fmap, with_stats: false) + fmap = {} + trees.each do |tree| + tree.split("\n").each do |line| + arr = line.split("[") + next if arr.size == 1 + + fid = arr[1].split("]")[0].split("<")[0] + fmap[fid] ||= 0 + fmap[fid] += 1 + end + end + fmap + else + average_over_splits = true + if importance_type == "total_gain" + importance_type = "gain" + average_over_splits = false + elsif importance_type == "total_cover" + importance_type = "cover" + average_over_splits = false + end + + trees = dump(fmap: fmap, with_stats: true) + + importance_type += "=" + fmap = {} + gmap = {} + trees.each do |tree| + tree.split("\n").each do |line| + arr = line.split("[") + next if arr.size == 1 + + fid = arr[1].split("]") + + g = fid[1].split(importance_type)[1].split(",")[0].to_f + + fid = fid[0].split("<")[0] + + fmap[fid] ||= 0 + gmap[fid] ||= 0 + + fmap[fid] += 1 + gmap[fid] += g + end + end + + if average_over_splits + gmap.each_key do |fid| + gmap[fid] = gmap[fid] / fmap[fid] + end + end + + gmap + end end private def handle_pointer