sad_panda.rb in sad_panda-0.1.6

- old
+ new
@@ -1,9 +1,168 @@
 require_relative "./sad_panda/version"
 require_relative "./sad_panda/status_message"
 require_relative './sad_panda/emotions/emotion_bank.rb'
 require_relative './sad_panda/emotions/term_polarities.rb'
 require_relative './sad_panda/emotions/stopwords.rb'
+require 'lingua/stemmer'
 
-
 module SadPanda
+
+	attr_accessor :message, :verbose
+	attr_reader :stemmer
+
+	# this method reads the text of the status message
+	# inputed by the user, removes common english words,
+	# strips punctuation and capitalized letters, isolates
+	# the stem of the word, and ultimately produces a hash
+	# where the keys are the stems of the remaining words,
+	# and the values are their respective frequencies within
+	# the status message
+	def self.build_term_frequencies message
+
+		@message = message
+
+		# create empty term_frequencies
+		term_frequencies = {}
+
+		# clean the text of the status message
+		if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
+			@happy_que = true
+		end
+		if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
+			@sad_que = true
+		end
+		if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
+			@uneasy_que = true
+		end
+		message_text = @message.gsub(/[^a-z ]/i, '').downcase
+		message_text = message_text.gsub(/\s\s+/,' ')
+		words = message_text.split(" ")
+
+		#filter for english stopwords
+		stopwords = Stopwords.stopwords
+		words = words - stopwords
+
+		#get word stems
+		word_stems = SadPanda.get_word_stems words
+
+		#create term_frequencies
+		word_stems.each do |stem|
+			term_frequencies[stem] = word_stems.count(stem)
+		end
+
+		#return term frequency matrix
+		term_frequencies
+	end
+
+	# this method takes an array of words an returns an array of word stems
+	def self.get_word_stems words
+		@stemmer = Lingua::Stemmer.new(:language => "en")
+		output = []
+		words.each do |word|
+			output << @stemmer.stem(word)
+		end
+		output
+	end
+
+	# this method takes an emotion-words hash and a hash containing word
+	# frequencies for the status message, calculates a numerical score
+	# for each possble emotion, and returns the emotion with the highest
+	# "score"
+	def self.get_emotion_score(emotions, term_frequencies, verbose = false)
+		emotion_score = {}
+		term_frequencies.each do |key,value|
+			emotions.keys.each do |k|
+				if emotions[k].include?(key)
+					emotion_score[k] ||= 0
+					emotion_score[k] += value
+				end
+			end
+		end
+		if @verbose
+			emotion_score.keys.each do |key|
+				puts "EMOTION: "+key
+				puts "SCORE: "+emotion_score[key].to_s
+			end
+		end
+			# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      if (@happy_que && @sad_que)
+          return "uncertain"
+      elsif @uneasy_que
+          return "uneasiness"
+      elsif @happy_que
+          return "joy"
+      elsif @sad_que
+          return "sadness"
+      else
+			## 0 if unable to detect emotion
+        if emotion_score == {}
+            return "uncertain"
+        else
+            score = emotion_score.max_by{|k, v| v}[0]
+        end
+        score
+      end
+	end
+
+	# this method returns the best-fit emotion for the status message
+	def self.emotion message
+          # get the emotion for which the emotion score value is highest
+          if @emotions
+              SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
+          else
+              SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
+          end
+	end
+
+	# this method gives the status method a normalized polarity
+	# value based on the words it contains
+	def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
+		polarity_scores = []
+		term_frequencies.each do |key, value|
+			polarity_hash.keys.each do |k|
+				if key == k
+					polarity_scores << (polarity_hash[k].to_f)
+				end
+			end
+		end
+
+			# return an polarity_score_hash to be processed by polarity method
+			# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      if (@happy_que && @sad_que)
+          score = 5
+      elsif @uneasy_que
+          score = 3
+      elsif @happy_que
+          score = 8
+      elsif @sad_que
+          score = 2
+      else
+				if polarity_scores == []
+					# polarity unreadable; return a neutral score of zero
+					score = 5
+				else
+					score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
+					polarity_scores = []
+				end
+				if @verbose
+					puts "POLARITY: " + score.to_s
+				end
+				score
+			end
+	end
+
+	# this method returns the polarity value for the status message
+	# (normalized by the number of 'polar' words that the status
+	# message contains)
+	def self.polarity message
+		# get the polarity for which the polarity score value is highest
+		if @polarities
+			SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
+		else
+			SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
+		end
+	end
+
 end