module Twitter
# A module for including Tweet parsing in a class. This module provides function for the extraction and processing
# of usernames, lists, URLs and hashtags.
module Extractor
# Extracts a list of all usernames mentioned in the Tweet text. If the
# text is nil or contains no username mentions an empty array
# will be returned.
#
# If a block is given then it will be called for each username.
def extract_mentioned_screen_names(text) # :yields: username
return [] unless text
possible_screen_names = []
text.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
possible_screen_names << sn unless after =~ Twitter::Regex[:at_signs]
end
possible_screen_names.each{|sn| yield sn } if block_given?
possible_screen_names
end
# Extracts the username username replied to in the Tweet text. If the
# text is nil or is not a reply nil will be returned.
#
# If a block is given then it will be called with the username replied to (if any)
def extract_reply_screen_name(text) # :yields: username
return nil unless text
possible_screen_name = text.match(Twitter::Regex[:extract_reply])
return unless possible_screen_name.respond_to?(:captures)
screen_name = possible_screen_name.captures.first
yield screen_name if block_given?
screen_name
end
# Extracts a list of all URLs included in the Tweet text. If the
# text is nil or contains no URLs an empty array
# will be returned.
#
# If a block is given then it will be called for each URL.
def extract_urls(text) # :yields: url
return [] unless text
urls = []
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
urls << (protocol == "www." ? "http://#{url}" : url)
end
urls.each{|url| yield url } if block_given?
urls
end
# Extracts a list of all hashtags included in the Tweet text. If the
# text is nil or contains no hashtags an empty array
# will be returned. The array returned will not include the leading #
# character.
#
# If a block is given then it will be called for each hashtag.
def extract_hashtags(text) # :yields: hashtag_text
return [] unless text
tags = []
text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
tags << hash_text
end
tags.each{|tag| yield tag } if block_given?
tags
end
end
end