lib/grabepg/grab_tvsou.rb in grab_epg-0.2.3 vs lib/grabepg/grab_tvsou.rb in grab_epg-0.2.4

- old
+ new

@@ -34,23 +34,31 @@ @grabbase = GrabBase.new @channels = {} @site="http://m.tvsou.com" end + def get_proxy_list + @proxy_list + end + #获取从tvsou的什么网站上获取 #type: mobile,webpage def get_url(type) return "http://m.tvsou.com/index.asp" if type.eql?("mobile") end def get_data_year_month_day(time) - month = time.month.to_s + month=time.month.to_s if month.length<2 - month = "0"+month + month="0"+month end - return {time:"#{time.year}-#{time.month}-#{time.day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{time.day})"} + day = time.day.to_s + if day.length<2 + day = "0"+day + end + return {time:"#{time.year}-#{time.month}-#{day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{day})"} end #获取时间 #start_time 时间起始点 #use_time 天数 @@ -109,10 +117,32 @@ #地方需要调用此函数 def dispose_channel_page(url,channel_type) end + #获取频道图标地址 + # url 手机表的URL值 + # channel_type 频道类型 + # no_dis 直接使用URL 不处理 + def get_channel_logo(_url,channel_type,no_dis=false) + if no_dis + url = _url + else + tvs = _url.split("TVid=") + tvid = tvs[1].split("&")[0] + channelids = _url.split("Channelid=") + channelid = channelids[1].split("&")[0] + if channel_type=="CCTV" + url = "http://epg.tvsou.com/programys/TV_#{tvid}/Channel_#{channelid}/W1.htm" + elsif channel_type=="WTV" + url = "http://epg.tvsou.com/programws/TV_#{tvid}/Channel_#{channelid}/W1.htm" + end + end + doc = @grabbase.get_doc_with_proxy(@proxy_list,url) + logo_network_path=doc.css("div[id='epg_m1']").css("img")[0].get_attribute("src") + return logo_network_path + end #获取频道时间表URL def dispose_href_schedule_data(href,start_time,use_time) hrefs=href.split("&programDT=") @@ -129,24 +159,24 @@ ret end #根据URL解析时间表页面 def dispose_schedule_page(url,start_time,use_time) - url = @site +"/"+url + url = url urls = url.split("?") begin doc = @grabbase.get_doc_with_proxy(@proxy_list,url) @error_num = 0 + _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href") rescue => err unless @error_num @error_num = 0 end @error_num+=1 raise err.to_s if @error_num==5 dispose_schedule_page(url,start_time,use_time) end - _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href") _url = urls[0]+_url urls = dispose_href_schedule_data(_url,start_time,use_time) ret = {} last_time = -5 last_schedule = {} @@ -160,11 +190,12 @@ div.css("li[class='gray']").each do |schedule| begin _dispose = schedule.content _dispose_show =schedule.css("span")[0].text time = _dispose.gsub(_dispose_show,"") - _url = @site+"/" + schedule.css('a')[0].get_attribute("href") if schedule.css('a')[0] + href =schedule.css('a')[schedule.css('a').count-1].get_attribute("href") + _url = @site+"/" + href if schedule.css('a')[0] schedules << {time:time,schedule_name:_dispose_show.delete(" 剧情"),url:_url} now = time.gsub(":","").to_i if((now-last_time)<5) schedules.delete(last_schedule) end @@ -188,10 +219,18 @@ #解析节目详情页面 def dispose_show_info(url) doc = @grabbase.get_doc_with_proxy(@proxy_list,url) + if doc.nil? + unless @error_num + @error_num = 0 + end + @error_num+=1 + raise err.to_s if @error_num==5 + dispose_show_info(url) + end begin show_name = doc.css('div[class="tv_info_top"]')[0].content _doc=doc.css("div[class='tv_info']") img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","") show_info = _doc.css("p")[0].content.gsub("[全文]","") @@ -208,6 +247,6 @@ end end -end \ No newline at end of file +end