Sha256: 65a8df9d6de238bba116ca69b0a147c58401d1e8fdee5db395c0521c80e329ac

Contents?: true

Size: 1.92 KB

Versions: 1

Compression:

Stored size: 1.92 KB

Contents

require 'cgi'
require 'hpricot'
require 'open-uri'
require 'htmlentities'

class IMDB
  
  attr_accessor :link, :doc, :id
  
  def initialize(name, year = nil, link = nil)
    @try = 3
    @name, @year, @link = name, year, link
    @coder = HTMLEntities.new
    set_doc
    set_id
  end

  def score
    if @doc && score_text = @doc.search("div.meta b").first
      score_text.inner_html.match(/(.*)\/10/)[1].to_f
    else
      0
    end
  end
  
  def year
    @doc ? @doc.search("title").inner_html.match(/\s\(([0-9]{4})/)[1].to_i : @year.to_i
  end
  
  def name
    $KCODE = 'utf-8'
    @doc ? @coder.decode(@doc.search("title").inner_html.match(/(.*)\s\(/u)[1]) : @name
  end
    
private
  
  def set_doc
    if @link
      @doc = Hpricot(open(@link.gsub(/\/\s*$/,'')))
      @id = @link.match(/tt[0-9]+/)[0]
    else
      query = "#{@name} (#{@year})"
      search_url = "http://www.imdb.com/find?q=#{CGI::escape(query)}"
      doc = Hpricot(open(search_url))
      case doc.search("title").inner_html
      when "IMDb Title Search", "IMDb Search" # search result page
        if !doc.search("b[text()*='Media from'] a").empty?        
          imdb_id = doc.search("b[text()*='Media from'] a").first[:href]
          movie_url = "http://www.imdb.com#{imdb_id}"
        elsif !doc.search("td[@valign='top'] a[@href^='/title/tt']").empty?
          imdb_id = doc.search("td[@valign='top'] a[@href^='/title/tt']").first[:href]
          movie_url = "http://www.imdb.com#{imdb_id}"
        else
          movie_url = nil
        end
        if movie_url
          @doc = Hpricot(open(movie_url))
          @id = movie_url.match(/tt[0-9]+/)[0]
        end
      else # direct in movie page
        @doc = doc
      end
    end
  rescue
    if @try > 0
      @try -= 1
      $stdout.print '*'
      set_doc
    else
      @doc = nil
    end
  end
  
  def set_id
    @id ||= doc.search("a[@href*='/title/tt']").first[:href].match(/tt[0-9]+/)[0] if doc
  end
    
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
pirate-autonzb-0.4.4 lib/imdb.rb