lib/grabepg/grab_tvsou.rb in grab_epg-0.2.1 vs lib/grabepg/grab_tvsou.rb in grab_epg-0.2.3
- old
+ new
@@ -76,10 +76,11 @@
channellist
}
doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page)
+ begin
doc.css("li").each do |li|
case ChannelTypeMap[li.get_attribute("class")]
when "央视"
@channels.merge!(get_channellist.call(li,"CCTV"))
when "卫视"
@@ -88,10 +89,19 @@
when "地方"
end
end
+ @error_num=0
+ rescue
+ unless @error_num
+ @error_num = 0
+ end
+ @error_num+=1
+ raise err.to_s if @error_num==5
+ dispose_home_page
+ end
return @channels
end
#获取频道列表
@@ -121,11 +131,21 @@
#根据URL解析时间表页面
def dispose_schedule_page(url,start_time,use_time)
url = @site +"/"+url
urls = url.split("?")
+ begin
doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+ @error_num = 0
+ rescue => err
+ unless @error_num
+ @error_num = 0
+ end
+ @error_num+=1
+ raise err.to_s if @error_num==5
+ dispose_schedule_page(url,start_time,use_time)
+ end
_url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
_url = urls[0]+_url
urls = dispose_href_schedule_data(_url,start_time,use_time)
ret = {}
last_time = -5
@@ -133,11 +153,13 @@
urls.each do |url|
p "Grab url: #{url}"
if url
doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url])
schedules = []
- doc.css('div[class="time"]')[0].css("li[class='gray']").each do |schedule|
+ div = doc.css('div[class="time"]')[0]
+ if div
+ div.css("li[class='gray']").each do |schedule|
begin
_dispose = schedule.content
_dispose_show =schedule.css("span")[0].text
time = _dispose.gsub(_dispose_show,"")
_url = @site+"/" + schedule.css('a')[0].get_attribute("href") if schedule.css('a')[0]
@@ -151,10 +173,13 @@
rescue => err
p "Schedule: #{schedule}"
end
end
ret.merge!({url[:date]=>schedules})
+ else
+ p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]"
+ end
end
end
return ret
end
@@ -163,14 +188,24 @@
#解析节目详情页面
def dispose_show_info(url)
doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+ begin
show_name = doc.css('div[class="tv_info_top"]')[0].content
_doc=doc.css("div[class='tv_info']")
img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","")
show_info = _doc.css("p")[0].content.gsub("[全文]","")
+ @error_num = 0
{show_name:show_name,img_url:img_url,show_info:show_info}
+ rescue => err
+ unless @error_num
+ @error_num = 0
+ end
+ @error_num+=1
+ raise err.to_s if @error_num==5
+ dispose_show_info(url)
+ end
end
end
\ No newline at end of file