sad_panda.rb in sad_panda-1.0.1

- old
+ new
@@ -4,157 +4,172 @@
 require_relative './sad_panda/emotions/stopwords.rb'
 require 'lingua/stemmer'
 
 module SadPanda
 
-	# this method reads the text of the status message
-	# inputed by the user, removes common english words,
-	# strips punctuation and capitalized letters, isolates
-	# the stem of the word, and ultimately produces a hash
-	# where the keys are the stems of the remaining words,
-	# and the values are their respective frequencies within
-	# the status message
-	def self.build_term_frequencies message
+  # this method returns the best-fit emotion for the status message
+  def self.emotion(message)
+    # get the emotion for which the emotion score value is highest
+    SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
+  end
 
-		@message = message
+  # this method returns the polarity value for the status message
+  # (normalized by the number of 'polar' words that the status
+  # message contains)
+  def self.polarity(message)
+    # get the polarity for which the polarity score value is highest
+    SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
+  end
 
-		# create empty term_frequencies
-		term_frequencies = {}
 
-		# clean the text of the status message
-		if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
-			@happy_que = true
-		end
-		if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
-			@sad_que = true
-		end
+  private
 
-		message_text = @message.gsub(/[^a-z ]/i, '').downcase
-    message_text.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
-    message_text.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
-    message_text.gsub!(/\s\s+/,' ')
-		words = message_text.split(" ")
+  	# this method reads the text of the status message
+  	# inputed by the user, removes common english words,
+  	# strips punctuation and capitalized letters, isolates
+  	# the stem of the word, and ultimately produces a hash
+  	# where the keys are the stems of the remaining words,
+  	# and the values are their respective frequencies within
+  	# the status message
+  	def self.build_term_frequencies(message, term_frequencies = {})
+  		# clean the text of the status message
+      happy_emoticon = happy_emoticon(message)
+      sad_emoticon = sad_emoticon(message)
+  		words = words_from_message_text(message)
+  		#filter for english stopwords
+  		stopwords = Stopwords.stopwords
+  		words = words - stopwords
+  		#get word stems
+  		word_stems = SadPanda.get_word_stems words
+  		#create term_frequencies
+  		#return term frequency hash
+    	create_term_frequencies(word_stems, term_frequencies)
+    end
 
-		#filter for english stopwords
-		stopwords = Stopwords.stopwords
-		words = words - stopwords
+  	# this method takes an array of words an returns an array of word stems
+  	def self.get_word_stems(words, output=[])
+  		stemmer = Lingua::Stemmer.new(:language => "en")
+  		words.each do |word|
+  			output << stemmer.stem(word)
+  		end
+  		output
+  	end
 
-		#get word stems
-		word_stems = SadPanda.get_word_stems words
+  	# this method takes an emotion-words hash and a hash containing word
+  	# frequencies for the status message, calculates a numerical score
+  	# for each possble emotion, and returns the emotion with the highest
+  	# "score"
+  	def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
+  		term_frequencies.each do |key,value|
+  			set_emotions(emotions, emotion_score, key, value)
+  		end
+  		# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      check_emoticon_for_emotion(emotion_score, message)
+  	end
 
-		#create term_frequencies
-		word_stems.each do |stem|
-			term_frequencies[stem] = word_stems.count(stem)
-		end
+  	# this method gives the status method a normalized polarity
+  	# value based on the words it contains
+  	def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
+  		term_frequencies.each do |key, value|
+        set_polarities(key, value, polarity_hash, polarity_scores)
+  		end
 
-		#return term frequency matrix
-		term_frequencies
-	end
+  		# return an polarity_score_hash to be processed by polarity method
+  		# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      check_emoticon_for_polarity(polarity_scores, message)
+  	end
 
-	# this method takes an array of words an returns an array of word stems
-	def self.get_word_stems words
-		@stemmer = Lingua::Stemmer.new(:language => "en")
-		output = []
-		words.each do |word|
-			output << @stemmer.stem(word)
-		end
-		output
-	end
+    def self.happy_emoticon(message)
+      (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
+    end
 
-	# this method takes an emotion-words hash and a hash containing word
-	# frequencies for the status message, calculates a numerical score
-	# for each possble emotion, and returns the emotion with the highest
-	# "score"
-	def self.get_emotion_score(emotions, term_frequencies, verbose = false)
-		emotion_score = {}
-		term_frequencies.each do |key,value|
-			emotions.keys.each do |k|
-				if emotions[k].include?(key)
-					emotion_score[k] ||= 0
-					emotion_score[k] += value
-				end
-			end
-		end
-		if @verbose
-			emotion_score.keys.each do |key|
-				puts "EMOTION: "+key
-				puts "SCORE: "+emotion_score[key].to_s
-			end
-		end
-		# return an emotion_score_hash to be processed by emotion
-    # get clue from any emoticons present
-    if (@happy_que && @sad_que)
-        return "ambiguous"
-    elsif @happy_que
-        return "joy"
-    elsif @sad_que
-        return "sadness"
-    else
-		## 0 if unable to detect emotion
-      if emotion_score == {}
-          return "ambiguous"
-      else
-          score = emotion_score.max_by{|k, v| v}[0]
+    def self.sad_emoticon(message)
+      (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
+    end
+
+    def self.words_from_message_text(message)
+      message.gsub!(/[^a-z ]/i, '')
+      message.downcase!
+      message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
+      message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
+      message.gsub!(/\s\s+/,' ')
+      message.split(" ")
+    end
+
+    def self.set_emotions(emotions, emotion_score, term, frequency)
+      emotions.keys.each do |k|
+        store_emotions(emotions, emotion_score, k, term, frequency)
       end
-      score
     end
-	end
 
-	# this method returns the best-fit emotion for the status message
-	def self.emotion message
-    # get the emotion for which the emotion score value is highest
-    if @emotions
-        SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
-    else
-        SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
+    def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
+      polarity_hash.keys.each do |k|
+        store_polarities(term, k, polarity_hash, polarity_scores)
+      end
     end
-	end
 
-	# this method gives the status method a normalized polarity
-	# value based on the words it contains
-	def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
-		polarity_scores = []
-		term_frequencies.each do |key, value|
-			polarity_hash.keys.each do |k|
-				if key == k
-					polarity_scores << (polarity_hash[k].to_f)
-				end
-			end
-		end
+    def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
+      if emotions[emotion].include?(term)
+        emotion_score[emotion] ||= 0
+        emotion_score[emotion] += frequency
+      end
+    end
 
-		# return an polarity_score_hash to be processed by polarity method
-		# return an emotion_score_hash to be processed by emotion
-    # get clue from any emoticons present
-    if (@happy_que && @sad_que)
+    def self.store_polarities(term, word, polarity_hash, polarity_scores)
+      if term == word
+        polarity_scores << (polarity_hash[word].to_f)
+      end
+    end
+
+    def self.check_emoticon_for_emotion(emotion_score, message)
+      if (happy_emoticon(message) && sad_emoticon(message))
+         "ambiguous"
+      elsif happy_emoticon(message)
+         "joy"
+      elsif sad_emoticon(message)
+         "sadness"
+      else
+        return_emotion_score(emotion_score)
+      end
+    end
+
+    def self.return_emotion_score(emotion_score)
+      ## 0 if unable to detect emotion
+      if emotion_score == {}
+        "ambiguous"
+      else
+        emotion_score.max_by{|k, v| v}[0]
+      end
+    end
+
+    def self.check_emoticon_for_polarity(polarity_scores, message)
+      if (happy_emoticon(message) && sad_emoticon(message))
         score = 5
-    elsif @happy_que
+      elsif happy_emoticon(message)
         score = 8
-    elsif @sad_que
+      elsif sad_emoticon(message)
         score = 2
-    else
-			if polarity_scores == []
-				# polarity unreadable; return a neutral score of zero
-				score = 5
-			else
-				score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
-				polarity_scores = []
-			end
-			if @verbose
-				puts "POLARITY: " + score.to_s
-			end
-			score
-		end
-	end
+      else
+        return_polarity_scores(polarity_scores)
+      end
+    end
 
-	# this method returns the polarity value for the status message
-	# (normalized by the number of 'polar' words that the status
-	# message contains)
-	def self.polarity message
-		# get the polarity for which the polarity score value is highest
-		if @polarities
-			SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
-		else
-			SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
-		end
-	end
+    def self.return_polarity_scores(polarity_scores)
+      if polarity_scores == []
+        # polarity unreadable; return a neutral score of 5
+        5
+      else
+        polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
+      end
+    end
+
+    def self.create_term_frequencies(word_stems, term_frequencies)
+      word_stems.each do |stem|
+        term_frequencies[stem] = word_stems.count(stem)
+      end
+      term_frequencies
+    end
+
 
 end