require 'erb'
module Craigler
class Search
include ERB::Util
attr_reader :search_term, :categories, :locations, :page_limit
# Creates a wrapper object for a craigslist search
#
# === Options
# [:in]
# Specifies the location(s) to search in. Defaults to :anywhere.
# [:only]
# Specifies the category or categories to search in. Defaults to :all_for_sale_or_wanted
# [:page_limit]
# Maximum number of pages to fetch results from. Defaults to 4.
# Note: A location may, and often does, have more than one searchable
# url assciated with it, e.g., {California}[http://geo.craigslist.org/iso/us/ca]. Because
# :page_limit is applied seperately to each url within the location, searching :in => :california
# with a :page_limit => 4 could potentially make up to 100 page requests.
def initialize(search_term, options = {})
raise InvalidSearchTerm if search_term.nil? || search_term == ''
options = {:in => :anywhere, :only => :all_for_sale_or_wanted, :page_limit => 4}.merge(options)
options[:in] = LOCATIONS.keys if options[:in] == :anywhere
@locations = (options[:in].is_a?(Array) ? options[:in] : [options[:in]]).collect(&:to_sym)
@categories = (options[:only].is_a?(Array) ? options[:only] : [options[:only]]).collect(&:to_sym)
@page_limit = options[:page_limit]
@search_term = search_term
@results = nil
_validate_locations()
_validate_categories()
end
# Returns the results of the search. If this is the first time
# calling #results then they will be fetched over the internet and cached in the search object.
#
# === Options
# [:refresh]
# Set to true to force an update across the internet.
def results(options = {})
options = { :refresh => false }.merge(options)
return @results unless @results.nil? || options[:refresh]
@results = []
last_page = @page_limit - 1 # pages start at 0
_for_each_locations_search_url() do |location, url|
(0..last_page).each do |page|
results = _extract_items_from_url(location, "#{url}&s=#{page*25}")
@results.push(*results)
break if results.size < RESULTS_PER_PAGE
end
end
results
end
protected
def _validate_locations
@locations.each() do |location|
raise InvalidLocation.new(":anywhere not expected as part of an array") if location == :anywhere
raise InvalidLocation.new(":#{location} is not a valid location") unless LOCATIONS.key?(location)
end
end
def _validate_categories
@categories.each() do |category|
raise InvalidCategory unless CATEGORIES.key?(category)
end
end
def _for_each_locations_search_url()
@locations.each do |location|
LOCATIONS[location].each do |url|
@categories.each do |category|
yield(location, "#{url}search/#{CATEGORIES[category]}?query=#{url_encode(@search_term)}&format=rss")
end
end
end
end
def _extract_items_from_url(location, url)
(Hpricot(open(url))/'item').collect { |item| {
:url => item['rdf:about'],
:title => (item%'title').inner_text,
:location => location.to_s,
:published_at => Time.parse((item%'dc:date').inner_text)
}}
end
end
end