lib/epitome/corpus.rb in epitome-0.2.0 vs lib/epitome/corpus.rb in epitome-0.2.1

- old
+ new

@@ -1,7 +1,8 @@ require 'matrix' require 'stopwords' +require 'pry' module Epitome class Corpus attr_reader :original_corpus def initialize(document_collection, lang="en") @@ -93,10 +94,13 @@ end def idf(word) # Number of documents in which word appears # Inverse Frequency Smooth (as per wikipedia article) - Math.log( @n_docs / n_docs_including_w(word) ) + result = Math.log( @n_docs / n_docs_including_w(word) ) + + # Return 1 to avoid words having all the same td_idf by multiplying by 0 + return result == 0 ? 1.0 : result end def tf(sentence, word) # Number of occurences of word in sentence sentence.scan(word).count