Sha256: eb42547a6e2b25bb84f09a18022aefaaf6279838076ac12ccd8fcf59341c56dc

Contents?: true

Size: 1.87 KB

Versions: 1

Compression:

Stored size: 1.87 KB

Contents

module Crags
  module Searcher
    include Fetch
    include ERB::Util

    def strip_http(url)
      url.gsub(/http\:\/\/(.*)(\/|(.html))/,'\1\3')
    end

    def location_link(country)
      "http://geo.craigslist.org/iso/#{country}"
    end

    def location_doc(country)
      fetch_doc(location_link(country))
    end

    def location_request(country)
      fetch_request(location_link(country))
    end

    def location_links(country)
      location_doc(country).search("#list a")
    end

    def locations(country)
      linkz = location_links(country)
      linkz = [location_request(country).last_effective_url] if linkz.empty?
      linkz.collect{|link| strip_http(link["href"]) }
    end

    def categories
      doc = fetch_doc("http://sfbay.craigslist.org/")
      links = doc.search("table[@summary=\"for sale\"] a")
      categories = {}
      links.each do |link|
        categories[link.inner_html] = link["href"]
      end
      categories
    end

    def search(keyword, country = 'us', category = 'sss', &block)
      locations(country).collect do |loc|
        sleep(1 + rand(3))
        search_location(keyword, loc, category, &block)
      end.flatten
    end

    def items(doc)
      doc.search("item").collect do |item|
        hashify(item)
      end
    end

    def hashify(item)
      title = item.at("title").inner_text
      url = strip_http(item["rdf:about"])
      date = DateTime.parse(item.at("dc:date").inner_text)
      {:title => title, :url => url, :date => date}
    end

    def search_location_link(keyword, loc, category = 'sss')
      "http://#{loc}/search/#{category}?query=#{url_encode(keyword)}"
    end

    def search_location(keyword, loc, category = 'sss', &block)
      doc = fetch_doc("#{search_location_link(keyword, loc, category)}&format=rss")
      items(doc).collect do |item|
        yield item if block_given?
        item
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
gotascii-crags-1.4.5 lib/crags/searcher.rb