grab_tvsou.rb in grab_epg-0.2.4

- old
+ new

@@ -34,23 +34,31 @@
       @grabbase = GrabBase.new
       @channels = {}
       @site="http://m.tvsou.com"
     end
 
+    def get_proxy_list
+      @proxy_list
+    end
+
     #获取从tvsou的什么网站上获取
     #type: mobile,webpage
     def get_url(type)
       return "http://m.tvsou.com/index.asp" if type.eql?("mobile")
     end
 
     def get_data_year_month_day(time)
 
-      month = time.month.to_s
+      month=time.month.to_s
       if month.length<2
-        month = "0"+month
+        month="0"+month
       end
-      return {time:"#{time.year}-#{time.month}-#{time.day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{time.day})"}
+      day = time.day.to_s
+      if day.length<2
+      	day = "0"+day
+      end
+      return {time:"#{time.year}-#{time.month}-#{day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{day})"}
     end
 
     #获取时间
     #start_time 时间起始点
     #use_time   天数
@@ -109,10 +117,32 @@
     #地方需要调用此函数
     def dispose_channel_page(url,channel_type)
 
     end
 
+    #获取频道图标地址
+    # url 手机表的URL值
+    # channel_type 频道类型
+    # no_dis 直接使用URL 不处理
+    def get_channel_logo(_url,channel_type,no_dis=false)
+      if no_dis
+        url = _url
+      else
+        tvs = _url.split("TVid=")
+        tvid = tvs[1].split("&")[0]
+        channelids = _url.split("Channelid=")
+        channelid = channelids[1].split("&")[0]
+        if channel_type=="CCTV"
+          url = "http://epg.tvsou.com/programys/TV_#{tvid}/Channel_#{channelid}/W1.htm"
+        elsif channel_type=="WTV"
+          url = "http://epg.tvsou.com/programws/TV_#{tvid}/Channel_#{channelid}/W1.htm"
+        end
+      end
+      doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+      logo_network_path=doc.css("div[id='epg_m1']").css("img")[0].get_attribute("src")
+      return logo_network_path
+    end
 
 
     #获取频道时间表URL
     def dispose_href_schedule_data(href,start_time,use_time)
      hrefs=href.split("&programDT=")
@@ -129,24 +159,24 @@
      ret
     end
 
    #根据URL解析时间表页面
    def dispose_schedule_page(url,start_time,use_time)
-     url = @site +"/"+url
+     url = url
      urls = url.split("?")
      begin
      doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
      @error_num = 0
+     _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
      rescue => err
        unless @error_num
          @error_num = 0
        end
        @error_num+=1
        raise err.to_s  if @error_num==5
        dispose_schedule_page(url,start_time,use_time)
      end
-     _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
      _url = urls[0]+_url
      urls = dispose_href_schedule_data(_url,start_time,use_time)
      ret = {}
      last_time = -5
      last_schedule = {}
@@ -160,11 +190,12 @@
          div.css("li[class='gray']").each do |schedule|
            begin
              _dispose = schedule.content
              _dispose_show =schedule.css("span")[0].text
              time = _dispose.gsub(_dispose_show,"")
-             _url = @site+"/" + schedule.css('a')[0].get_attribute("href") if schedule.css('a')[0]
+             href =schedule.css('a')[schedule.css('a').count-1].get_attribute("href")
+             _url = @site+"/" + href if schedule.css('a')[0]
              schedules << {time:time,schedule_name:_dispose_show.delete(" 剧情"),url:_url}
              now = time.gsub(":","").to_i
              if((now-last_time)<5)
                schedules.delete(last_schedule)
              end
@@ -188,10 +219,18 @@
 
 
     #解析节目详情页面
     def dispose_show_info(url)
       doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+      if doc.nil?
+        unless @error_num
+          @error_num = 0
+        end
+        @error_num+=1
+        raise err.to_s  if @error_num==5
+        dispose_show_info(url)
+      end
       begin
       show_name = doc.css('div[class="tv_info_top"]')[0].content
       _doc=doc.css("div[class='tv_info']")
       img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","")
       show_info = _doc.css("p")[0].content.gsub("[全文]","")
@@ -208,6 +247,6 @@
     end
 
 
   end
 
-end
\ No newline at end of file
+end