Sha256: 4b44f4e5296d99c01f28fc21c66bbb2f71c0e68c2a7aa27947cd0d2401eee270
Contents?: true
Size: 1.63 KB
Versions: 2
Compression:
Stored size: 1.63 KB
Contents
module Crags module Searcher include Fetch include ERB::Util def strip_http(url) url.gsub(/http\:\/\/(.*)(\/|(.html))/,'\1\3') end def location_doc(country) fetch_doc("http://geo.craigslist.org/iso/#{country}") end def location_links(country) location_doc(country).search("#list a") end def locations(country) location_links(country).collect{|link| strip_http(link["href"]) } end def categories doc = fetch_doc("http://sfbay.craigslist.org/") links = doc.search("table[@summary=\"for sale\"] a") categories = {} links.each do |link| categories[link.inner_html] = link["href"] end categories end def search(keyword, country = 'us', category = 'sss', &block) locations(country).collect do |loc| sleep(1 + rand(3)) search_location(keyword, loc, category, &block) end.flatten end def items(doc) doc.search("item").collect do |item| hashify(item) end end def hashify(item) title = item.at("title").inner_text url = strip_http(item["rdf:about"]) date = DateTime.parse(item.at("dc:date").inner_text) {:title => title, :url => url, :date => date} end def search_location_link(keyword, loc, category = 'sss') "http://#{loc}/search/#{category}?query=#{url_encode(keyword)}" end def search_location(keyword, loc, category = 'sss', &block) doc = fetch_doc("#{search_location_link(keyword, loc, category)}&format=rss") items(doc).collect do |item| yield item if block_given? item end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
gotascii-crags-1.4.2 | lib/crags/searcher.rb |
gotascii-crags-1.4.3 | lib/crags/searcher.rb |