Sha256: d1cdeb97a24a08ec88ff8345c3a0abca14a6dd16dced6aa5dad4603a2a15ad4e

Contents?: true

Size: 1.59 KB

Versions: 1

Compression:

Stored size: 1.59 KB

Contents

# -*- encoding: utf-8 -*-
#
# Copyright:: Copyright (c) kyow, 2011
# Authors:: K.Nishi

$:.unshift(File.dirname(__FILE__))
require 'dictionary'

module TfIdf
  #
  # ユーティリティクラス
  #
  class Utility
    # (仮の)インデックス数
    N = 20000000000
    
    # df.dicからidf.dicを生成する
    def self.create_dic
      dfs = self.df_load()
      idfs = self.create_idf(dfs)
      self.idf_save(idfs)
      puts "complete."
      puts "size=#{idfs.size} average=#{idfs.average}"
    end
    
    # DF値からiDF値を取得する
    # IDF = log(N / DF)
    # df_value:: DF値
    def self.get_inverse(df_value)
      return df_value > 0 ? Math::log(N / df_value) : 0
    end
    
    private
    
    # DFsオブジェクトからIDFsオブジェクトを生成する
    # dfs:: DFsオブジェクト
    def self.create_idf(dfs)
      idfs = IDFs.new
      total = 0
      dfs.all.each_pair { |k, v|
        idf = TfIdf::Utility.get_inverse(v)
        idfs.set(k, idf)
        total += idf
      }
      idfs.size = dfs.all.size
      idfs.average = total / idfs.size
      return idfs
    end
    
    # df.dicを読み込む
    def self.df_load()
      df_dic  = File.dirname(__FILE__) + '/../dic/#{Version.ruby}/df.dic'
      unless File::exists?(df_dic)
        raise
      end
      File::open(df_dic) { |f|
        return Marshal.load(f)
      }
    end
    
    # idf.dicを保存する
    def self.idf_save(idfs)
      idf_dic  = File.dirname(__FILE__) + '/../dic/#{Version.ruby}/idf.dic'
      File::open(idf_dic, 'wb') { |f|
        Marshal.dump(idfs, f)
      }
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
tfidf_ja-0.2.1 lib/utility.rb