Sha256: 6b76b356f2e2d0b817c7d1f8d9ef8e3a0303407a7ca9a9e7489290d93f3ce08a

Contents?: true

Size: 1.64 KB

Versions: 21

Compression:

Stored size: 1.64 KB

Contents

require 'open-uri'
require 'nokogiri'

module Scrapers
  module Xkcd
    XKCD_URL = "http://xkcd.com"
    PUBDATE_FORMAT = "%F"

    # Get the current or numbered xkcd comic
    #
    # +comic+ = (string) the number of the xkcd comic to
    #retreive. Gets current comic if nil.
    #
    # returns hash containing comic info:
    #
    #      {:title => "comic' title",
    #       :url => "url to comic",
    #       :img_src => "source url to comic image",
    #       :hover_text => "the hover (mouse-over) text",
    #       :pubdate => "publication date",
    #      }
    #
    def self.scrape(comic=nil)
      results = Hash.new

      url = URI.parse XKCD_URL
      url.path = "/#{comic}/" unless comic.nil?
      results[:url] = url.to_s

      doc = Nokogiri::HTML(open(url.to_s))
      comic = doc.at_css("#comic img")
      results[:img_src] = comic.attr("src")
      results[:img_title] = comic.attr("title")
      results[:title] = results[:img_alt] = comic.attr("alt")
      results[:pubdate] = get_pubdate(results[:img_src])

      results
    end

    # Get the http header of the image file which reveals the last_modified date.
    # We'll use this as the publication date.
    def self.get_pubdate(url)
      url = URI.parse(url.dup)
      head_req = Net::HTTP::Head.new url
      
      head = Net::HTTP.start(url.host, url.port) do |http|
        http.request head_req
      end
      return Time.now.strftime(PUBDATE_FORMAT) if head["Last-Modified"].nil?
      last_modified = Time.parse(head["Last-Modified"]) rescue nil
      return Time.now.strftime(PUBDATE_FORMAT) if last_modified.nil?
      last_modified.strftime(PUBDATE_FORMAT)
    end
    
  end
end

Version data entries

21 entries across 21 versions & 1 rubygems

Version Path
scrapers-3.2.0 lib/scrapers/xkcd.rb
scrapers-3.1.0 lib/scrapers/xkcd.rb
scrapers-3.0.0 lib/scrapers/xkcd.rb
scrapers-2.1.0 lib/scrapers/xkcd.rb
scrapers-2.0.2 lib/scrapers/xkcd.rb
scrapers-2.0.1 lib/scrapers/xkcd.rb
scrapers-2.0.0 lib/scrapers/xkcd.rb
scrapers-1.5.6 lib/scrapers/xkcd.rb
scrapers-1.5.5 lib/scrapers/xkcd.rb
scrapers-1.5.4 lib/scrapers/xkcd.rb
scrapers-1.5.3 lib/scrapers/xkcd.rb
scrapers-1.5.2 lib/scrapers/xkcd.rb
scrapers-1.5.1 lib/scrapers/xkcd.rb
scrapers-1.5.0 lib/scrapers/xkcd.rb
scrapers-1.4.0 lib/scrapers/xkcd.rb
scrapers-1.3.0 lib/scrapers/xkcd.rb
scrapers-1.2.0 lib/scrapers/xkcd.rb
scrapers-1.1.0 lib/scrapers/xkcd.rb
scrapers-1.0.0 lib/scrapers/xkcd.rb
scrapers-0.4.3 lib/scrapers/xkcd.rb