lib/sad_panda.rb in sad_panda-1.0.1 vs lib/sad_panda.rb in sad_panda-1.1.0

- old
+ new

@@ -1,175 +1,24 @@ -require_relative "./sad_panda/version" -require_relative './sad_panda/emotions/emotion_bank.rb' -require_relative './sad_panda/emotions/term_polarities.rb' -require_relative './sad_panda/emotions/stopwords.rb' +require 'sad_panda/bank/emotions' +require 'sad_panda/bank/polarities' +require 'sad_panda/bank/stopwords' +require 'sad_panda/emotion' +require 'sad_panda/polarity' +require 'sad_panda/helpers' require 'lingua/stemmer' +# SadPanda main module module SadPanda - # this method returns the best-fit emotion for the status message - def self.emotion(message) + def self.emotion(text) # get the emotion for which the emotion score value is highest - SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message)) + SadPanda::Emotion.new(text).call end # this method returns the polarity value for the status message # (normalized by the number of 'polar' words that the status # message contains) - def self.polarity(message) + def self.polarity(text) # get the polarity for which the polarity score value is highest - SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message)) + SadPanda::Polarity.new(text).call end - - - private - - # this method reads the text of the status message - # inputed by the user, removes common english words, - # strips punctuation and capitalized letters, isolates - # the stem of the word, and ultimately produces a hash - # where the keys are the stems of the remaining words, - # and the values are their respective frequencies within - # the status message - def self.build_term_frequencies(message, term_frequencies = {}) - # clean the text of the status message - happy_emoticon = happy_emoticon(message) - sad_emoticon = sad_emoticon(message) - words = words_from_message_text(message) - #filter for english stopwords - stopwords = Stopwords.stopwords - words = words - stopwords - #get word stems - word_stems = SadPanda.get_word_stems words - #create term_frequencies - #return term frequency hash - create_term_frequencies(word_stems, term_frequencies) - end - - # this method takes an array of words an returns an array of word stems - def self.get_word_stems(words, output=[]) - stemmer = Lingua::Stemmer.new(:language => "en") - words.each do |word| - output << stemmer.stem(word) - end - output - end - - # this method takes an emotion-words hash and a hash containing word - # frequencies for the status message, calculates a numerical score - # for each possble emotion, and returns the emotion with the highest - # "score" - def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {}) - term_frequencies.each do |key,value| - set_emotions(emotions, emotion_score, key, value) - end - # return an emotion_score_hash to be processed by emotion - # get clue from any emoticons present - check_emoticon_for_emotion(emotion_score, message) - end - - # this method gives the status method a normalized polarity - # value based on the words it contains - def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = []) - term_frequencies.each do |key, value| - set_polarities(key, value, polarity_hash, polarity_scores) - end - - # return an polarity_score_hash to be processed by polarity method - # return an emotion_score_hash to be processed by emotion - # get clue from any emoticons present - check_emoticon_for_polarity(polarity_scores, message) - end - - def self.happy_emoticon(message) - (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]")) - end - - def self.sad_emoticon(message) - (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-[")) - end - - def self.words_from_message_text(message) - message.gsub!(/[^a-z ]/i, '') - message.downcase! - message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '') - message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '') - message.gsub!(/\s\s+/,' ') - message.split(" ") - end - - def self.set_emotions(emotions, emotion_score, term, frequency) - emotions.keys.each do |k| - store_emotions(emotions, emotion_score, k, term, frequency) - end - end - - def self.set_polarities(term, frequency, polarity_hash, polarity_scores) - polarity_hash.keys.each do |k| - store_polarities(term, k, polarity_hash, polarity_scores) - end - end - - def self.store_emotions(emotions, emotion_score, emotion, term, frequency) - if emotions[emotion].include?(term) - emotion_score[emotion] ||= 0 - emotion_score[emotion] += frequency - end - end - - def self.store_polarities(term, word, polarity_hash, polarity_scores) - if term == word - polarity_scores << (polarity_hash[word].to_f) - end - end - - def self.check_emoticon_for_emotion(emotion_score, message) - if (happy_emoticon(message) && sad_emoticon(message)) - "ambiguous" - elsif happy_emoticon(message) - "joy" - elsif sad_emoticon(message) - "sadness" - else - return_emotion_score(emotion_score) - end - end - - def self.return_emotion_score(emotion_score) - ## 0 if unable to detect emotion - if emotion_score == {} - "ambiguous" - else - emotion_score.max_by{|k, v| v}[0] - end - end - - def self.check_emoticon_for_polarity(polarity_scores, message) - if (happy_emoticon(message) && sad_emoticon(message)) - score = 5 - elsif happy_emoticon(message) - score = 8 - elsif sad_emoticon(message) - score = 2 - else - return_polarity_scores(polarity_scores) - end - end - - def self.return_polarity_scores(polarity_scores) - if polarity_scores == [] - # polarity unreadable; return a neutral score of 5 - 5 - else - polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length - end - end - - def self.create_term_frequencies(word_stems, term_frequencies) - word_stems.each do |stem| - term_frequencies[stem] = word_stems.count(stem) - end - term_frequencies - end - - end