lib/grabepg/grab_tvsou.rb in grab_epg-0.2.1 vs lib/grabepg/grab_tvsou.rb in grab_epg-0.2.3

- old
+ new

@@ -76,10 +76,11 @@ channellist } doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page) + begin doc.css("li").each do |li| case ChannelTypeMap[li.get_attribute("class")] when "央视" @channels.merge!(get_channellist.call(li,"CCTV")) when "卫视" @@ -88,10 +89,19 @@ when "地方" end end + @error_num=0 + rescue + unless @error_num + @error_num = 0 + end + @error_num+=1 + raise err.to_s if @error_num==5 + dispose_home_page + end return @channels end #获取频道列表 @@ -121,11 +131,21 @@ #根据URL解析时间表页面 def dispose_schedule_page(url,start_time,use_time) url = @site +"/"+url urls = url.split("?") + begin doc = @grabbase.get_doc_with_proxy(@proxy_list,url) + @error_num = 0 + rescue => err + unless @error_num + @error_num = 0 + end + @error_num+=1 + raise err.to_s if @error_num==5 + dispose_schedule_page(url,start_time,use_time) + end _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href") _url = urls[0]+_url urls = dispose_href_schedule_data(_url,start_time,use_time) ret = {} last_time = -5 @@ -133,11 +153,13 @@ urls.each do |url| p "Grab url: #{url}" if url doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url]) schedules = [] - doc.css('div[class="time"]')[0].css("li[class='gray']").each do |schedule| + div = doc.css('div[class="time"]')[0] + if div + div.css("li[class='gray']").each do |schedule| begin _dispose = schedule.content _dispose_show =schedule.css("span")[0].text time = _dispose.gsub(_dispose_show,"") _url = @site+"/" + schedule.css('a')[0].get_attribute("href") if schedule.css('a')[0] @@ -151,10 +173,13 @@ rescue => err p "Schedule: #{schedule}" end end ret.merge!({url[:date]=>schedules}) + else + p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]" + end end end return ret end @@ -163,14 +188,24 @@ #解析节目详情页面 def dispose_show_info(url) doc = @grabbase.get_doc_with_proxy(@proxy_list,url) + begin show_name = doc.css('div[class="tv_info_top"]')[0].content _doc=doc.css("div[class='tv_info']") img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","") show_info = _doc.css("p")[0].content.gsub("[全文]","") + @error_num = 0 {show_name:show_name,img_url:img_url,show_info:show_info} + rescue => err + unless @error_num + @error_num = 0 + end + @error_num+=1 + raise err.to_s if @error_num==5 + dispose_show_info(url) + end end end \ No newline at end of file