#encoding:utf-8

require 'nokogiri'
require 'open-uri'

module Grabepg
  # To change this template use File | Settings | File Templates.


  #图片的获取： Net::HTTP.get(url)
  #图片的文件类型获取：

  attr_reader :channel  #频道列表
  attr_reader :site #网站地址
  attr_reader :proxyindex #代理的索引
  attr_reader :show_schedule #根据节目的时间表
  attr_reader :img_down_path #图片下载路径存放

  DEFAULT_GrabtvType=["cctv","satellite","digital",]
  DEFAULT_SITE = "http://www.tvmao.com"


#将星期的wday获取值转化为中文名
#conversion wady to chinese
  def self.conversion_what_day(whatday)
    ret = "星期"
    case whatday.to_i
      when 1
        ret += "一"
      when 2
        ret += "二"
      when 3
        ret += "三"
      when 4
        ret += "四"
      when 5
        ret += "五"
      when 6
        ret += "六"
      when 7
        ret += "七"
    end
    ret
  end

  #如果时间为1~9的一位则为其在数字前加0补齐二位
  def self.dispose_time(num)
    num = num.to_s
    if num.length < 2
      num = "0"+num
    end
    num
  end

  #转化当前时间的格式
  def self.get_week_date_time(time)
    month = time.month
    day = time.day
    whatday = time.wday
    ret = conversion_what_day(whatday) + "(" + dispose_time(month) + "-"+dispose_time(day)+")"
    ret
  end

  #前几天需要减去的num
  def self.del_day_num(day_num)
    ret = day_num*60*60*24
    ret
  end

  #获取距离当前多少天的之前的日期
  def self.get_time_day_prior(num)
    time = Time.now - del_day_num(num)
    ret = get_week_date_time(time)
    ret
  end

  #前面一周要删除的日期的列表
  def self.del_time_list
    ret = []
    time = Time.now
    wday = time.wday
    if(wday==1)
      for i in 0..7
        ret<<self.get_time_day_prior(i)
      end
    end
    ret
 end


  #调用此方法的例子
  def self.start
    @channel = []
    @site = DEFAULT_SITE
    channel_list = self.getchannels("/home/zql")
    proxy_list=get_topfast_list(5)
    img_down_path = self.img_down_path
    p img_down_path
    channel_urls = channel_list["channel_urls"]
    channel_urls.each do |channel,url|
      p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
    end
  end

  def self.img_down_path
    @img_down_path
  end


  #获取网站的频道表
  #img_path 图片存放路径
  def self.getchannels(img_dir_path)
    @channel = []
    @site=DEFAULT_SITE
    @proxyindex = 0
    @img_down_dir_path = img_dir_path
    @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')

    channel_urls = {}
    channel_info = {}
    get_url =lambda { |type|
      @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
        }

    get_channel_id = lambda {|url|
      channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
    }

    DEFAULT_GrabtvType.each do |type|
      url = get_url.call(type)
      p url
      doc = Nokogiri::HTML(open(url))
      p doc.content
      p "*************************************************************"
      doc.css('td[class="tdchn"]').each do |td|
       channel_name=td.content
       herf = ""
       td.css('a').each do |a|
        herf=a['href']
       end
        channel_id = get_channel_id.call(herf)

       #获取频道图片的地址
        img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
        @img_down_file.puts("#{channel_id}:#{img_path}")
        @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
        channel_info.merge!({channel_id=>{"channel_name"=>channel_name,"channel_type"=>type,"channel_id"=>channel_id,"img_path"=>img_path}})
        channel_urls.merge!({channel_id=>herf})
      end
    end
    @img_down_file.close
    p "Channel: #{@channel}"
    {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
  end

  #使用代理获取url的html的doc值
  def self.get_doc_with_proxy(proxylist,url)
    unless @proxyindex
      @proxyindex = 0
    end
    @proxyindex=@proxyindex%proxylist.size
    if(proxylist[@proxyindex])
      proxy = proxylist[@proxyindex]
    else
      proxy = proxylist[@proxyindex+1]
    end
    begin
      doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
      doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
      @no_firest = 0
    rescue => err
      @no_firest += 1
      p "*************************Proxy:#{proxy}, url:#{url}"
      get_doc_with_proxy(proxylist,url) if @no_firest<4
      raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
    end
    @proxyindex += 1
    unless doc
      p "*************************Proxy:#{proxy}, url:#{url}"
    end
    doc
  end

  #获取某天的节目表
  def self.get_schedulelist_atday(channel,url,proxylist)
    p "Grab: #{url}"
    doc = get_doc_with_proxy(proxylist,url)
    show_type = []
    img_url = _img_url + channel+".jpg"
    data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
    date = data[0]
    week = data[1]
    p "Channel: #{channel}  Date: #{date} Week: #{week}"
    @date = "#{week}(#{date})"
    schedule_list = []
    doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
      _herf= schedule.xpath('a[@href]')[0]
      schedule_herf=_herf.get_attribute("href") if _herf
      unless _herf
        drama =schedule.css('a[class="drama"]')[0]
        if drama
          _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
          schedule_herf = _herfs.split("#%#")[0]
        end
      end
      if schedule.content.split(" ").size>1
        time = schedule.content.split(" ")[0]
        schedule = schedule.content.split(" ")[1]
        show_name = ""
        unless schedule_herf.nil?||schedule_herf.empty?
          show_infomation=get_show_infomation(proxylist,schedule_herf)
          show_type=show_infomation["type"]
          show_name = show_infomation["name"]
          show_img = show_infomation["img"]
        end
        p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf}  type: #{show_type} name: #{show_name} img:#{show_img}"
        schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
      end
    end
    schedule_list
  end

  #获取制定时间和长度url
  #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
  #day_num 为int型 代表抓取的时间从开始时间计算的多少天
  def get_assign_date_url(url,start_time,day_num)
    _url = site
    urls = []
    _urls = url.split("-")

    time = Time.now
    _wday = time.wday
    wday = _wday + start_time
    if wday<0
      wday = 1
    end

    end_day = wday + day_num

    if end_day>(_wday+7)
     end_day = _wday + 7
    end

    0.upto(1).each do |i|
      _url = _url+"#{_urls[i]}"+"-"
    end

    wday.upto(end_day).each do |i|
      urls << _url+"w#{i}.html"
    end
    urls
  end


  #获取指定时间段的节目表
  def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
    begin
      day_num = 1 if day_num<1
    rescue
      day_num = 1
    end
    site="http://www.tvmao.com"
    unless img_dir_down_path
      img_dir_down_path = __FILE__
    end
    @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

    if(@site)
      site=@site
    end
    _img_url = "http://static.haotv.me/channel/logo/"
    @show_schedule = {}

    channel_schedule = {}
    get_assign_date_url(herf,start_num,day_num).each do |url|
      @date = ""
      schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
      channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
    end
    @img_down_file.close
    {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
  end


  #因原已调用所以保留
  #获取一周节目表
  def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
    p "Day Num is #{day_num}"
    begin
      day_num = 1 if day_num<1
    rescue
      day_num = 1
    end
    site="http://www.tvmao.com"
    unless img_dir_down_path
      img_dir_down_path = __FILE__
    end
    @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

    if(@site)
      site=@site
    end
    _img_url = "http://static.haotv.me/channel/logo/"
    @show_schedule = {}

    get_week_url = lambda {|url,day_num|
       _url = site
       urls = []
       _urls = url.split("-")
       0.upto(1).each do |i|
        _url = _url+"#{_urls[i]}"+"-"
       end
      1.upto(day_num).each do |i|
        urls << _url+"w#{i}.html"
      end
      urls
    }

    channel_schedule = {}
    get_week_url.call(herf,day_num).each do |url|
      @date = ""
      schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
      channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
    end
    @img_down_file.close
    {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
  end


  #获取节目详细信息
  def self.get_show_infomation(proxy_list,schedule_herf)
    begin
    @proxyindex = 0
    unless @site
      @site = "http://www.tvmao.com"
    end
    schedule_herf = @site + schedule_herf
    doc=get_doc_with_proxy(proxy_list,schedule_herf)
    #title = doc.css("a[herf='#{schedule_herf}+/detail']")[0]['title']
   # p "title: %s" % title
    type = []
    name = doc.css('span[itemprop="name"]')[0].content

    #获取节目的图片
    if doc.css('img[class="tvc"]')
     schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')[0]
    end


    doc.css('span[itemprop="genre"]').each do |_type|
      type << _type.content
    end
    doc.css('a[itemprop="genre"]').each do |_type|
      type<<_type.content
    end
    url = "#{schedule_herf}/detail"
    doc = get_doc_with_proxy(proxy_list,url)
    doc.css('span[itemprop="genre"]').each do |_type|
      type << _type.content
    end
    doc.css('a[itemprop="genre"]').each do |_type|
      type<<_type.content
    end
    type.uniq!
    @img_down_file.puts("#{name}:#{schedule_img_down_path}")
    @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
    {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
    rescue => e
      p "Error In get_show_infomation msg : #{e.to_s}"
    end
  end

  #获取节目的时间表
  def self.get_show_schedule(proxylist,herf)
    url = herf + "/playingtime"
    doc = get_doc_with_proxy(proxylist,url)
    i = 0
    schedule = []
    doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
      unless(i==0)
        time = epg.css('div[class="f1 fld"]')[0].content
        channel_name = epg.css('div[class="f2 fld"]')[0].content
        show_name = epg.css('div[class="f3 fld"]')[0].content
        times = time.split(" ")
        week = times[0]
        date = times[1]
        _time = times[2]
        schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
      end
      i += 1
    end
    schedule
  end


  #获取指定访问速度的代理服务器
  #time为最慢速度的时间 int型 代表秒
  def self.get_topfast_list(use_time)
    fast_list = []
    time_use = 0
    ips_ports = get_proxy_list()
    ips_ports.each do |ip_port|
      time_start = Time.now.to_i
      begin
        timeout(use_time) do
          doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
        end
        time_end = Time.now.to_i
        time_use = time_end - time_start
        p  "http://#{ip_port}   use_time:#{time_use}"
      rescue Exception =>e
        case e
          when Errno::ETIMEDOUT
            p "Use http://#{ip_port} timeout"
          when Timeout::Error
            p "Use http://#{ip_port} timeout"
          when Errno::ECONNREFUSED
            p "Use http://#{ip_port} Error connection"
          else
            p "Use http://#{ip_port} Error:#{e.to_s}"
        end
        time_use = -1
      end
      if(time_use > 0 &&time_use < 8)
        fast_list << ip_port
      end
    end
    fast_list
  end

  #获取代理列表
  def self.get_proxy_list()
    list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
    if list.count ==0
      list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
    end
    ips_ports = []
    regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
    regex_ip   = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
    list.each do |proxy_txt|
      port = proxy_txt[regex_port]
      ip = proxy_txt[regex_ip]
      if(ip != ""&& !port.to_s.eql?('3128'))
        port_ip = ip.to_s + ":" + port.to_s
        ips_ports << port_ip
      end
    end
    p "Count: #{ips_ports.count}"
    ips_ports
  end

  def self.gg(url)
    regex_list = /<TD class="list">.*<\/TD>/
    href =URI.parse(url)
    contxt = ""
    href.open{ |f|
      f.each_line {|line| contxt =contxt + line + "\n"}
    }
    list = contxt.scan(regex_list)
  end

  def save_img

  end

end