%w( rubygems cgi json open-uri ostruct ).each {|lib| require lib } class TwitterSearchWatcher TWITTER_SEARCH_URL = 'http://search.twitter.com/search.json' DEFAULT_USER_AGENT = 'TwitterSearchWatcher RubyGem http://github.com/devfu/twitter-search-watcher' QUERY_STRING_ATTRIBUTES = [ :q, :to, :from, :since_id, :page, :max_id, :rpp ] # The User-Agent header value to send along with all Twitter Search API requests attr_accessor :user_agent # A string you want to search twitter for attr_accessor :q # The username of someone you want to search replies to attr_accessor :to # The username of someone you want to search replies from attr_accessor :from # Get a particular page of Twitter search results (pagination). # Typically used in conjunction with :max_id attr_accessor :page # Used for pagination, so you can get page=3 where the max_id of the first page was 1234 attr_accessor :max_id # Only get tweets with ID's greater than this ID (useful for only getting new tweets) attr_accessor :since_id # Number of results per page (max 100) attr_accessor :rpp # The number of seconds to wait between Twitter calls. Default: 60 (seconds) attr_accessor :check_every # The maximum number of pages to check for tweets # # If nil, we'll check until there are no more pages (when :next_page isn't present) attr_accessor :max_pages def rpp= value raise "The maximum rpp (Results per Page) value is 100" if value > 100 @rpp = value end def check_every @check_every || 60 end # Create a new TwitterSearchWatcher # # TwitterSearchWatcher.new 'string to search' # TwitterSearchWatcher.new 'string to search', :check_every => 60 # TwitterSearchWatcher.new :to => 'barackobama', :from => 'SenJohnMcCain' # def initialize search_string = nil, options = nil if search_string.is_a? Hash options = search_string else self.q = search_string end options.each {|k,v| send "#{k}=", v } if options end # Returns the URL we'll use to call the Twitter Search API. # # Without parameters, it'll generate a URL just from this TwitterSearchWatcher instance. # # With parameters, it'll override the TwitterSearchWatcher instance's options with # whatever you pass, eg. # # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url # => "http://search.twitter.com/search.json?q=foo&rpp=15" # # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url( :rpp => 99 ) # => "http://search.twitter.com/search.json?q=foo&rpp=99" # def search_url additional_parameters = nil TWITTER_SEARCH_URL + build_query_string(additional_parameters) end def user_agent @user_agent || DEFAULT_USER_AGENT end # Performs a search. Accepts the same parameters as #search_url def search! additional_parameters = nil JSON.parse open( search_url(additional_parameters), 'User-Agent' => user_agent ).read end # Performs a search, given the response from another search. # # If a response if given, the search will only return tweets newer than the given response's tweets. # If a response is not given, this performs a normal search. # # Accepts additional parameters (same as #search_url) def search_newer! response = nil, additional_parameters = nil if response search!( (additional_parameters || {}).merge( :since_id => response['max_id'] ) ) else search! additional_parameters end end # Performs a search, given the response from another search. # # If the response given is paginated (ie. there are additional tweets available on additional pages), # this will return the next page. Else, this will return nil. # # Accepts additional parameters (same as #search_url) def search_more! response, additional_parameters = nil search!( (additional_parameters || {}).merge( :page => (response['page'] + 1), :max_id => response['max_id'] ) ) if response['next_page'] end # Instantiates a new TwitterSearchWatcher given the search_string and options and then # calls #watch on the instance using the block given. def self.watch! search_string, options = nil, &block watcher = TwitterSearchWatcher.new search_string, options watcher.watch! &block end # Starts watching this search in a loop. # It will wait #check_every seconds between new requests (except requests to get additional pages). # Every time a new tweet is found, that tweet is passed to the block given. # # TwitterSearchWatcher.new('foo').watch! {|tweet| puts "got tweet: #{ tweet.text }" } # def watch! additional_parameters = nil, &block @max_id_found_so_far = 0 trap('INT'){ puts "\nexiting ..."; exit } puts "Watching for tweets: #{ search_url(additional_parameters) }" loop do @last_response = search_newer!(@last_response, additional_parameters) call_tweet_callbacks(@last_response, block) update_max_id @last_response # this is kindof icky ... but it works if @last_response['next_page'] response = @last_response num_pages_searched = 0 while (response = search_more!(response, additional_parameters)) && (num_pages_searched <= max_pages if max_pages) num_pages_searched += 1 call_tweet_callbacks(response, block) update_max_id response end end sleep check_every end end private def update_max_id response @max_id_found_so_far = response['max_id'] if response['max_id'] > @max_id_found_so_far end def call_tweet_callbacks response, block response['results'].each do |tweet| tweet['tweet_id'] = tweet.delete 'id' block.call OpenStruct.new(tweet) end end def escape string CGI.escape(string.to_s).gsub('%22','"').gsub(' ','+') end def build_query_string additional_parameters = nil parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr| all[attr] = send(attr) if send(attr) all } # if additional parameters are passed, we override the watcher's parameters with these if additional_parameters additional_parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr| all[attr] = additional_parameters[attr] if additional_parameters.keys.include?(attr) all } parameter_values.merge! additional_parameter_values end '?' + parameter_values.map {|k,v| "#{ k }=#{ escape(v) }" if v }.compact.join('&') end end