lib/sad_panda.rb in sad_panda-1.0.0 vs lib/sad_panda.rb in sad_panda-1.0.1
- old
+ new
@@ -4,157 +4,172 @@
require_relative './sad_panda/emotions/stopwords.rb'
require 'lingua/stemmer'
module SadPanda
- # this method reads the text of the status message
- # inputed by the user, removes common english words,
- # strips punctuation and capitalized letters, isolates
- # the stem of the word, and ultimately produces a hash
- # where the keys are the stems of the remaining words,
- # and the values are their respective frequencies within
- # the status message
- def self.build_term_frequencies message
+ # this method returns the best-fit emotion for the status message
+ def self.emotion(message)
+ # get the emotion for which the emotion score value is highest
+ SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
+ end
- @message = message
+ # this method returns the polarity value for the status message
+ # (normalized by the number of 'polar' words that the status
+ # message contains)
+ def self.polarity(message)
+ # get the polarity for which the polarity score value is highest
+ SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
+ end
- # create empty term_frequencies
- term_frequencies = {}
- # clean the text of the status message
- if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
- @happy_que = true
- end
- if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
- @sad_que = true
- end
+ private
- message_text = @message.gsub(/[^a-z ]/i, '').downcase
- message_text.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
- message_text.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
- message_text.gsub!(/\s\s+/,' ')
- words = message_text.split(" ")
+ # this method reads the text of the status message
+ # inputed by the user, removes common english words,
+ # strips punctuation and capitalized letters, isolates
+ # the stem of the word, and ultimately produces a hash
+ # where the keys are the stems of the remaining words,
+ # and the values are their respective frequencies within
+ # the status message
+ def self.build_term_frequencies(message, term_frequencies = {})
+ # clean the text of the status message
+ happy_emoticon = happy_emoticon(message)
+ sad_emoticon = sad_emoticon(message)
+ words = words_from_message_text(message)
+ #filter for english stopwords
+ stopwords = Stopwords.stopwords
+ words = words - stopwords
+ #get word stems
+ word_stems = SadPanda.get_word_stems words
+ #create term_frequencies
+ #return term frequency hash
+ create_term_frequencies(word_stems, term_frequencies)
+ end
- #filter for english stopwords
- stopwords = Stopwords.stopwords
- words = words - stopwords
+ # this method takes an array of words an returns an array of word stems
+ def self.get_word_stems(words, output=[])
+ stemmer = Lingua::Stemmer.new(:language => "en")
+ words.each do |word|
+ output << stemmer.stem(word)
+ end
+ output
+ end
- #get word stems
- word_stems = SadPanda.get_word_stems words
+ # this method takes an emotion-words hash and a hash containing word
+ # frequencies for the status message, calculates a numerical score
+ # for each possble emotion, and returns the emotion with the highest
+ # "score"
+ def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
+ term_frequencies.each do |key,value|
+ set_emotions(emotions, emotion_score, key, value)
+ end
+ # return an emotion_score_hash to be processed by emotion
+ # get clue from any emoticons present
+ check_emoticon_for_emotion(emotion_score, message)
+ end
- #create term_frequencies
- word_stems.each do |stem|
- term_frequencies[stem] = word_stems.count(stem)
- end
+ # this method gives the status method a normalized polarity
+ # value based on the words it contains
+ def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
+ term_frequencies.each do |key, value|
+ set_polarities(key, value, polarity_hash, polarity_scores)
+ end
- #return term frequency matrix
- term_frequencies
- end
+ # return an polarity_score_hash to be processed by polarity method
+ # return an emotion_score_hash to be processed by emotion
+ # get clue from any emoticons present
+ check_emoticon_for_polarity(polarity_scores, message)
+ end
- # this method takes an array of words an returns an array of word stems
- def self.get_word_stems words
- @stemmer = Lingua::Stemmer.new(:language => "en")
- output = []
- words.each do |word|
- output << @stemmer.stem(word)
- end
- output
- end
+ def self.happy_emoticon(message)
+ (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
+ end
- # this method takes an emotion-words hash and a hash containing word
- # frequencies for the status message, calculates a numerical score
- # for each possble emotion, and returns the emotion with the highest
- # "score"
- def self.get_emotion_score(emotions, term_frequencies, verbose = false)
- emotion_score = {}
- term_frequencies.each do |key,value|
- emotions.keys.each do |k|
- if emotions[k].include?(key)
- emotion_score[k] ||= 0
- emotion_score[k] += value
- end
- end
- end
- if @verbose
- emotion_score.keys.each do |key|
- puts "EMOTION: "+key
- puts "SCORE: "+emotion_score[key].to_s
- end
- end
- # return an emotion_score_hash to be processed by emotion
- # get clue from any emoticons present
- if (@happy_que && @sad_que)
- return "ambiguous"
- elsif @happy_que
- return "joy"
- elsif @sad_que
- return "sadness"
- else
- ## 0 if unable to detect emotion
- if emotion_score == {}
- return "ambiguous"
- else
- score = emotion_score.max_by{|k, v| v}[0]
+ def self.sad_emoticon(message)
+ (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
+ end
+
+ def self.words_from_message_text(message)
+ message.gsub!(/[^a-z ]/i, '')
+ message.downcase!
+ message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
+ message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
+ message.gsub!(/\s\s+/,' ')
+ message.split(" ")
+ end
+
+ def self.set_emotions(emotions, emotion_score, term, frequency)
+ emotions.keys.each do |k|
+ store_emotions(emotions, emotion_score, k, term, frequency)
end
- score
end
- end
- # this method returns the best-fit emotion for the status message
- def self.emotion message
- # get the emotion for which the emotion score value is highest
- if @emotions
- SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
- else
- SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
+ def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
+ polarity_hash.keys.each do |k|
+ store_polarities(term, k, polarity_hash, polarity_scores)
+ end
end
- end
- # this method gives the status method a normalized polarity
- # value based on the words it contains
- def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
- polarity_scores = []
- term_frequencies.each do |key, value|
- polarity_hash.keys.each do |k|
- if key == k
- polarity_scores << (polarity_hash[k].to_f)
- end
- end
- end
+ def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
+ if emotions[emotion].include?(term)
+ emotion_score[emotion] ||= 0
+ emotion_score[emotion] += frequency
+ end
+ end
- # return an polarity_score_hash to be processed by polarity method
- # return an emotion_score_hash to be processed by emotion
- # get clue from any emoticons present
- if (@happy_que && @sad_que)
+ def self.store_polarities(term, word, polarity_hash, polarity_scores)
+ if term == word
+ polarity_scores << (polarity_hash[word].to_f)
+ end
+ end
+
+ def self.check_emoticon_for_emotion(emotion_score, message)
+ if (happy_emoticon(message) && sad_emoticon(message))
+ "ambiguous"
+ elsif happy_emoticon(message)
+ "joy"
+ elsif sad_emoticon(message)
+ "sadness"
+ else
+ return_emotion_score(emotion_score)
+ end
+ end
+
+ def self.return_emotion_score(emotion_score)
+ ## 0 if unable to detect emotion
+ if emotion_score == {}
+ "ambiguous"
+ else
+ emotion_score.max_by{|k, v| v}[0]
+ end
+ end
+
+ def self.check_emoticon_for_polarity(polarity_scores, message)
+ if (happy_emoticon(message) && sad_emoticon(message))
score = 5
- elsif @happy_que
+ elsif happy_emoticon(message)
score = 8
- elsif @sad_que
+ elsif sad_emoticon(message)
score = 2
- else
- if polarity_scores == []
- # polarity unreadable; return a neutral score of zero
- score = 5
- else
- score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
- polarity_scores = []
- end
- if @verbose
- puts "POLARITY: " + score.to_s
- end
- score
- end
- end
+ else
+ return_polarity_scores(polarity_scores)
+ end
+ end
- # this method returns the polarity value for the status message
- # (normalized by the number of 'polar' words that the status
- # message contains)
- def self.polarity message
- # get the polarity for which the polarity score value is highest
- if @polarities
- SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
- else
- SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
- end
- end
+ def self.return_polarity_scores(polarity_scores)
+ if polarity_scores == []
+ # polarity unreadable; return a neutral score of 5
+ 5
+ else
+ polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
+ end
+ end
+
+ def self.create_term_frequencies(word_stems, term_frequencies)
+ word_stems.each do |stem|
+ term_frequencies[stem] = word_stems.count(stem)
+ end
+ term_frequencies
+ end
+
end