Sha256: ec415f234c390493090252d7a3339c1c4ec62dbcc8745c6a87bf51c66d6762ec
Contents?: true
Size: 893 Bytes
Versions: 5
Compression:
Stored size: 893 Bytes
Contents
# A document-term matrix using the tf*idf function. # # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html module TfIdfSimilarity class TfIdfModel < Model # Return the term's inverse document frequency. # # @param [String] term a term # @return [Float] the term's inverse document frequency def inverse_document_frequency(term) df = @model.document_count(term) 1 + log(documents.size / (df + 1.0)) end alias_method :idf, :inverse_document_frequency # Returns the term's frequency in the document. # # @param [Document] document a document # @param [String] term a term # @return [Float] the term's frequency in the document def term_frequency(document, term) tf = document.term_count(term) sqrt(tf) end alias_method :tf, :term_frequency end end
Version data entries
5 entries across 5 versions & 1 rubygems