module Libmf class Model def initialize(**options) @options = options end def fit(data, eval_set: nil) train_set = create_problem(data) @model = if eval_set eval_set = create_problem(eval_set) FFI.mf_train_with_validation(train_set, eval_set, param) else FFI.mf_train(train_set, param) end raise Error, "fit failed" if @model.null? nil end def predict(row, column) FFI.mf_predict(model, row, column) end def cv(data, folds: 5) problem = create_problem(data) # TODO update fork to differentiate between bad parameters and zero error res = FFI.mf_cross_validation(problem, folds, param) raise Error, "cv failed" if res == 0 res end def save_model(path) status = FFI.mf_save_model(model, path) raise Error, "Cannot save model" if status != 0 end alias_method :save, :save_model def self.load(path) model = Model.new model.load_model(path) model end def load_model(path) @model = FFI.mf_load_model(path) raise Error, "Cannot open model" if @model.null? end def rows model[:m] end def columns model[:n] end def factors model[:k] end def bias model[:b] end def p_factors(format: nil) _factors(model[:p], rows, format) end def q_factors(format: nil) _factors(model[:q], columns, format) end def rmse(data) FFI.calc_rmse(create_problem(data), model) end def mae(data) FFI.calc_mae(create_problem(data), model) end def gkl(data) FFI.calc_gkl(create_problem(data), model) end def logloss(data) FFI.calc_logloss(create_problem(data), model) end def accuracy(data) FFI.calc_accuracy(create_problem(data), model) end def mpr(data, transpose) FFI.calc_mpr(create_problem(data), model, transpose) end def auc(data, transpose) FFI.calc_auc(create_problem(data), model, transpose) end private def _factors(ptr, n, format) case format when :numo Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors) when nil ptr.read_array_of_float(n * factors).each_slice(factors).to_a else raise ArgumentError, "Invalid format" end end def model raise Error, "Not fit" unless @model @model end def param param = FFI.mf_get_default_param options = @options.dup if options[:loss].is_a?(Symbol) loss_map = { real_l2: 0, real_l1: 1, real_kl: 2, binary_log: 5, binary_l2: 6, binary_l1: 7, one_class_row: 10, one_class_col: 11, one_class_l2: 12 } options[:loss] = loss_map[options[:loss]] || (raise ArgumentError, "Unknown loss") end # silence insufficient blocks warning with default params options[:bins] ||= 25 unless options[:nr_bins] options[:copy_data] = false unless options.key?(:copy_data) options_map = { loss: :fun, factors: :k, threads: :nr_threads, bins: :nr_bins, iterations: :nr_iters, learning_rate: :eta, nmf: :do_nmf } options.each do |k, v| k = options_map[k] if options_map[k] param[k] = v end # do_nmf must be true for generalized KL-divergence param[:do_nmf] = true if param[:fun] == 2 param end def create_problem(data) if data.is_a?(String) # need to expand path so it's absolute return FFI.mf_read_problem(File.expand_path(data)) end if data.is_a?(Matrix) data = data.data end raise Error, "No data" if data.empty? # TODO do in C for better performance # can use FIX2INT() and RFLOAT_VALUE() instead of pack # and write directly to C string buffer = String.new pack_format = "iif" data.each do |row| row.pack(pack_format, buffer: buffer) end r = ::FFI::MemoryPointer.new(FFI::Node, data.size) r.write_bytes(buffer) # double check size is what we expect # FFI will throw an error above if too long raise Error, "Bad buffer size" if r.size != buffer.bytesize m = data.max_by { |r| r[0] }[0] + 1 n = data.max_by { |r| r[1] }[1] + 1 prob = FFI::Problem.new prob[:m] = m prob[:n] = n prob[:nnz] = data.size prob[:r] = r prob end end end