lib/libmf/model.rb in libmf-0.1.3 vs lib/libmf/model.rb in libmf-0.2.0

- old
+ new

@@ -49,30 +49,42 @@ def bias model[:b] end - def p_factors - reshape(model[:p].read_array_of_float(factors * rows), factors) + def p_factors(format: nil) + _factors(model[:p], rows, format) end - def q_factors - reshape(model[:q].read_array_of_float(factors * columns), factors) + def q_factors(format: nil) + _factors(model[:q], columns, format) end private + def _factors(ptr, n, format) + case format + when :numo + Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors) + when nil + ptr.read_array_of_float(n * factors).each_slice(factors).to_a + else + raise ArgumentError, "Invalid format" + end + end + def model raise Error, "Not fit" unless @model @model end def param param = FFI.mf_get_default_param options = @options.dup # silence insufficient blocks warning with default params options[:bins] ||= 25 unless options[:nr_bins] + options[:copy_data] = false unless options.key?(:copy_data) options_map = { :loss => :fun, :factors => :k, :threads => :nr_threads, :bins => :nr_bins, @@ -88,33 +100,39 @@ param[:do_nmf] = true if param[:fun] == 2 param end def create_problem(data) + if data.is_a?(String) + # need to expand path so it's absolute + return FFI.mf_read_problem(File.expand_path(data)) + end + raise Error, "No data" if data.empty? - nodes = [] - r = ::FFI::MemoryPointer.new(FFI::Node, data.size) - data.each_with_index do |row, i| - n = FFI::Node.new(r[i]) - n[:u] = row[0] - n[:v] = row[1] - n[:r] = row[2] - nodes << n + # TODO do in C for better performance + # can use FIX2INT() and RFLOAT_VALUE() instead of pack + buffer = String.new + data.each do |row| + row[0, 2].pack("i*".freeze, buffer: buffer) + row[2, 1].pack("f".freeze, buffer: buffer) end - m = nodes.map { |n| n[:u] }.max + 1 - n = nodes.map { |n| n[:v] }.max + 1 + r = ::FFI::MemoryPointer.new(FFI::Node, data.size) + r.write_bytes(buffer) + # double check size is what we expect + # FFI will throw an error above if too long + raise Error, "Bad buffer size" if r.size != buffer.bytesize + + m = data.max_by { |r| r[0] }[0] + 1 + n = data.max_by { |r| r[1] }[1] + 1 + prob = FFI::Problem.new prob[:m] = m prob[:n] = n - prob[:nnz] = nodes.size + prob[:nnz] = data.size prob[:r] = r prob - end - - def reshape(arr, factors) - arr.each_slice(factors).to_a end end end