Sha256: 8cc3d4c0a34b5a046d9920754502cb1ec554cddb9a4e0f44d9e4655a6989ebdb

Contents?: true

Size: 1.13 KB

Versions: 1

Compression:

Stored size: 1.13 KB

Contents

require 'uri'
require 'net/http'

module GovKit
  module SearchEngines
    class GoogleNews
      def self.search(options=[])
        query = options.join('+')
        host = "news.google.com"
        path = "/news?hl=en&ned=us&q=#{URI::encode(query)}&btnG=Search+News&num=50"

        html = make_request(host, path)
        doc = Hpricot(Iconv.conv('utf-8//IGNORE', 'gb2312',html))
        stories = doc.search("div.search-results > div.story")

        citations = []

        stories.each do |story|
          citation = GovKit::Citation.new

          citation.title = story.at("h2.title a").inner_text.html_safe!
          citation.url = story.at("h2.title a").attributes["href"]
          citation.date = story.at("div.sub-title > span.date").inner_html.html_safe!
          citation.source = story.at("div.sub-title > span.source").inner_html.html_safe!
          citation.excerpt = story.at("div.body > div.snippet").inner_html.html_safe!

          citations << citation
        end
        citations
      end

      def self.make_request(host, path)
        puts host+path
        response = Net::HTTP.get(host, path)
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
govkit-0.1.0 lib/gov_kit/search_engines/google_news.rb