Sha256: 4b44f4e5296d99c01f28fc21c66bbb2f71c0e68c2a7aa27947cd0d2401eee270

Contents?: true

Size: 1.63 KB

Versions: 2

Compression:

Stored size: 1.63 KB

Contents

module Crags
  module Searcher
    include Fetch
    include ERB::Util

    def strip_http(url)
      url.gsub(/http\:\/\/(.*)(\/|(.html))/,'\1\3')
    end

    def location_doc(country)
      fetch_doc("http://geo.craigslist.org/iso/#{country}")
    end

    def location_links(country)
      location_doc(country).search("#list a")
    end

    def locations(country)
      location_links(country).collect{|link| strip_http(link["href"]) }
    end

    def categories
      doc = fetch_doc("http://sfbay.craigslist.org/")
      links = doc.search("table[@summary=\"for sale\"] a")
      categories = {}
      links.each do |link|
        categories[link.inner_html] = link["href"]
      end
      categories
    end

    def search(keyword, country = 'us', category = 'sss', &block)
      locations(country).collect do |loc|
        sleep(1 + rand(3))
        search_location(keyword, loc, category, &block)
      end.flatten
    end

    def items(doc)
      doc.search("item").collect do |item|
        hashify(item)
      end
    end

    def hashify(item)
      title = item.at("title").inner_text
      url = strip_http(item["rdf:about"])
      date = DateTime.parse(item.at("dc:date").inner_text)
      {:title => title, :url => url, :date => date}
    end

    def search_location_link(keyword, loc, category = 'sss')
      "http://#{loc}/search/#{category}?query=#{url_encode(keyword)}"
    end

    def search_location(keyword, loc, category = 'sss', &block)
      doc = fetch_doc("#{search_location_link(keyword, loc, category)}&format=rss")
      items(doc).collect do |item|
        yield item if block_given?
        item
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
gotascii-crags-1.4.2 lib/crags/searcher.rb
gotascii-crags-1.4.3 lib/crags/searcher.rb