require 'erb'
module Craigler
class Search
include ERB::Util
attr_reader :search_term, :categories, :locations
# Creates a wrapper object for a craigslist search
#
# === Options
# [:in]
# Specifies the location(s) to search in. Defaults to :anywhere.
# [:only]
# Specifies the category or categories to search in. Defaults to :all_for_sale_or_wanted
def initialize(search_term, options = {})
raise InvalidSearchTerm if search_term.nil? || search_term == ''
@search_term = search_term
@results = nil
_parse_options(options)
end
# Returns the results of the search. If this is the first time
# calling #results then they will be fetched over the internet and cached in the search object.
#
# === Options
# [:page_limit]
# Maximum number of pages to fetch results from. Defaults to 4.
# Note: A location may, and often does, have more than one searchable
# url assciated with it, e.g., {California}[http://geo.craigslist.org/iso/us/ca]. Because
# :page_limit is applied seperately to each url within the location, searching :in => :california
# with a :page_limit => 4 could potentially make up to 100 page requests.
# [:refresh]
# Set to true to force an update across the internet.
def results(options = {})
options = { :page_limit => 4, :refresh => false }.merge(options)
return @results unless @results.nil? || options[:refresh]
@results = []
last_page = options[:page_limit] - 1 # pages start at 0
_for_each_locations_search_url() do |location, url|
(0..last_page).each do |page|
results = _extract_items_from_url(location, "#{url}&s=#{page*25}")
@results.push(*results)
break if results.size < RESULTS_PER_PAGE
end
end
results
end
private
def _parse_options(options)
options = {:in => LOCATIONS.keys, :only => :all_for_sale_or_wanted}.merge(options)
@locations = options[:in].is_a?(Array) ? options[:in] : [options[:in]]
@categories = options[:only].is_a?(Array) ? options[:only] : [options[:only]]
@locations.each() do |location|
raise InvalidLocation unless location == :anywhere || LOCATIONS.key?(location)
end
@categories.each() do |category|
raise InvalidCategory unless category == :all_for_sale_or_wanted || CATEGORIES.key?(category)
end
end
def _for_each_locations_search_url()
@locations.each do |location|
LOCATIONS[location].each do |url|
@categories.each do |category|
yield(location, "#{url}search/#{CATEGORIES[category]}?query=#{url_encode(@search_term)}&format=rss")
end
end
end
end
def _extract_items_from_url(location, url)
(Hpricot(open(url))/'item').collect { |item| {
:url => item['rdf:about'],
:title => (item%'title').inner_text,
:location => location.to_s,
:published_at => Time.parse((item%'dc:date').inner_text)
}}
end
end
end