lib/sad_panda.rb in sad_panda-1.0.1 vs lib/sad_panda.rb in sad_panda-1.1.0
- old
+ new
@@ -1,175 +1,24 @@
-require_relative "./sad_panda/version"
-require_relative './sad_panda/emotions/emotion_bank.rb'
-require_relative './sad_panda/emotions/term_polarities.rb'
-require_relative './sad_panda/emotions/stopwords.rb'
+require 'sad_panda/bank/emotions'
+require 'sad_panda/bank/polarities'
+require 'sad_panda/bank/stopwords'
+require 'sad_panda/emotion'
+require 'sad_panda/polarity'
+require 'sad_panda/helpers'
require 'lingua/stemmer'
+# SadPanda main module
module SadPanda
-
# this method returns the best-fit emotion for the status message
- def self.emotion(message)
+ def self.emotion(text)
# get the emotion for which the emotion score value is highest
- SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
+ SadPanda::Emotion.new(text).call
end
# this method returns the polarity value for the status message
# (normalized by the number of 'polar' words that the status
# message contains)
- def self.polarity(message)
+ def self.polarity(text)
# get the polarity for which the polarity score value is highest
- SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
+ SadPanda::Polarity.new(text).call
end
-
-
- private
-
- # this method reads the text of the status message
- # inputed by the user, removes common english words,
- # strips punctuation and capitalized letters, isolates
- # the stem of the word, and ultimately produces a hash
- # where the keys are the stems of the remaining words,
- # and the values are their respective frequencies within
- # the status message
- def self.build_term_frequencies(message, term_frequencies = {})
- # clean the text of the status message
- happy_emoticon = happy_emoticon(message)
- sad_emoticon = sad_emoticon(message)
- words = words_from_message_text(message)
- #filter for english stopwords
- stopwords = Stopwords.stopwords
- words = words - stopwords
- #get word stems
- word_stems = SadPanda.get_word_stems words
- #create term_frequencies
- #return term frequency hash
- create_term_frequencies(word_stems, term_frequencies)
- end
-
- # this method takes an array of words an returns an array of word stems
- def self.get_word_stems(words, output=[])
- stemmer = Lingua::Stemmer.new(:language => "en")
- words.each do |word|
- output << stemmer.stem(word)
- end
- output
- end
-
- # this method takes an emotion-words hash and a hash containing word
- # frequencies for the status message, calculates a numerical score
- # for each possble emotion, and returns the emotion with the highest
- # "score"
- def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
- term_frequencies.each do |key,value|
- set_emotions(emotions, emotion_score, key, value)
- end
- # return an emotion_score_hash to be processed by emotion
- # get clue from any emoticons present
- check_emoticon_for_emotion(emotion_score, message)
- end
-
- # this method gives the status method a normalized polarity
- # value based on the words it contains
- def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
- term_frequencies.each do |key, value|
- set_polarities(key, value, polarity_hash, polarity_scores)
- end
-
- # return an polarity_score_hash to be processed by polarity method
- # return an emotion_score_hash to be processed by emotion
- # get clue from any emoticons present
- check_emoticon_for_polarity(polarity_scores, message)
- end
-
- def self.happy_emoticon(message)
- (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
- end
-
- def self.sad_emoticon(message)
- (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
- end
-
- def self.words_from_message_text(message)
- message.gsub!(/[^a-z ]/i, '')
- message.downcase!
- message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
- message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
- message.gsub!(/\s\s+/,' ')
- message.split(" ")
- end
-
- def self.set_emotions(emotions, emotion_score, term, frequency)
- emotions.keys.each do |k|
- store_emotions(emotions, emotion_score, k, term, frequency)
- end
- end
-
- def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
- polarity_hash.keys.each do |k|
- store_polarities(term, k, polarity_hash, polarity_scores)
- end
- end
-
- def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
- if emotions[emotion].include?(term)
- emotion_score[emotion] ||= 0
- emotion_score[emotion] += frequency
- end
- end
-
- def self.store_polarities(term, word, polarity_hash, polarity_scores)
- if term == word
- polarity_scores << (polarity_hash[word].to_f)
- end
- end
-
- def self.check_emoticon_for_emotion(emotion_score, message)
- if (happy_emoticon(message) && sad_emoticon(message))
- "ambiguous"
- elsif happy_emoticon(message)
- "joy"
- elsif sad_emoticon(message)
- "sadness"
- else
- return_emotion_score(emotion_score)
- end
- end
-
- def self.return_emotion_score(emotion_score)
- ## 0 if unable to detect emotion
- if emotion_score == {}
- "ambiguous"
- else
- emotion_score.max_by{|k, v| v}[0]
- end
- end
-
- def self.check_emoticon_for_polarity(polarity_scores, message)
- if (happy_emoticon(message) && sad_emoticon(message))
- score = 5
- elsif happy_emoticon(message)
- score = 8
- elsif sad_emoticon(message)
- score = 2
- else
- return_polarity_scores(polarity_scores)
- end
- end
-
- def self.return_polarity_scores(polarity_scores)
- if polarity_scores == []
- # polarity unreadable; return a neutral score of 5
- 5
- else
- polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
- end
- end
-
- def self.create_term_frequencies(word_stems, term_frequencies)
- word_stems.each do |stem|
- term_frequencies[stem] = word_stems.count(stem)
- end
- term_frequencies
- end
-
-
end