lib/sad_panda.rb in sad_panda-0.1.5 vs lib/sad_panda.rb in sad_panda-0.1.6

- old
+ new

@@ -1,9 +1,168 @@ require_relative "./sad_panda/version" require_relative "./sad_panda/status_message" require_relative './sad_panda/emotions/emotion_bank.rb' require_relative './sad_panda/emotions/term_polarities.rb' require_relative './sad_panda/emotions/stopwords.rb' +require 'lingua/stemmer' - module SadPanda + + attr_accessor :message, :verbose + attr_reader :stemmer + + # this method reads the text of the status message + # inputed by the user, removes common english words, + # strips punctuation and capitalized letters, isolates + # the stem of the word, and ultimately produces a hash + # where the keys are the stems of the remaining words, + # and the values are their respective frequencies within + # the status message + def self.build_term_frequencies message + + @message = message + + # create empty term_frequencies + term_frequencies = {} + + # clean the text of the status message + if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]")) + @happy_que = true + end + if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-[")) + @sad_que = true + end + if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\")) + @uneasy_que = true + end + message_text = @message.gsub(/[^a-z ]/i, '').downcase + message_text = message_text.gsub(/\s\s+/,' ') + words = message_text.split(" ") + + #filter for english stopwords + stopwords = Stopwords.stopwords + words = words - stopwords + + #get word stems + word_stems = SadPanda.get_word_stems words + + #create term_frequencies + word_stems.each do |stem| + term_frequencies[stem] = word_stems.count(stem) + end + + #return term frequency matrix + term_frequencies + end + + # this method takes an array of words an returns an array of word stems + def self.get_word_stems words + @stemmer = Lingua::Stemmer.new(:language => "en") + output = [] + words.each do |word| + output << @stemmer.stem(word) + end + output + end + + # this method takes an emotion-words hash and a hash containing word + # frequencies for the status message, calculates a numerical score + # for each possble emotion, and returns the emotion with the highest + # "score" + def self.get_emotion_score(emotions, term_frequencies, verbose = false) + emotion_score = {} + term_frequencies.each do |key,value| + emotions.keys.each do |k| + if emotions[k].include?(key) + emotion_score[k] ||= 0 + emotion_score[k] += value + end + end + end + if @verbose + emotion_score.keys.each do |key| + puts "EMOTION: "+key + puts "SCORE: "+emotion_score[key].to_s + end + end + # return an emotion_score_hash to be processed by emotion + # get clue from any emoticons present + if (@happy_que && @sad_que) + return "uncertain" + elsif @uneasy_que + return "uneasiness" + elsif @happy_que + return "joy" + elsif @sad_que + return "sadness" + else + ## 0 if unable to detect emotion + if emotion_score == {} + return "uncertain" + else + score = emotion_score.max_by{|k, v| v}[0] + end + score + end + end + + # this method returns the best-fit emotion for the status message + def self.emotion message + # get the emotion for which the emotion score value is highest + if @emotions + SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message)) + else + SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message)) + end + end + + # this method gives the status method a normalized polarity + # value based on the words it contains + def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false) + polarity_scores = [] + term_frequencies.each do |key, value| + polarity_hash.keys.each do |k| + if key == k + polarity_scores << (polarity_hash[k].to_f) + end + end + end + + # return an polarity_score_hash to be processed by polarity method + # return an emotion_score_hash to be processed by emotion + # get clue from any emoticons present + if (@happy_que && @sad_que) + score = 5 + elsif @uneasy_que + score = 3 + elsif @happy_que + score = 8 + elsif @sad_que + score = 2 + else + if polarity_scores == [] + # polarity unreadable; return a neutral score of zero + score = 5 + else + score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length + polarity_scores = [] + end + if @verbose + puts "POLARITY: " + score.to_s + end + score + end + end + + # this method returns the polarity value for the status message + # (normalized by the number of 'polar' words that the status + # message contains) + def self.polarity message + # get the polarity for which the polarity score value is highest + if @polarities + SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message)) + else + SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message)) + end + end + end