#encoding:utf-8

require 'nokogiri'
require 'open-uri'

require File.expand_path("../grabepg/grab_base.rb", __FILE__)
require File.expand_path("../grabepg/grab_tvsou.rb", __FILE__)

module Grabepg
  class GrabTvmao
  # To change this template use File | Settings | File Templates.


  #图片的获取： Net::HTTP.get(url)
  #图片的文件类型获取：

  attr_reader :channel  #频道列表
  attr_reader :site #网站地址
  attr_reader :proxyindex #代理的索引
  attr_reader :show_schedule #根据节目的时间表
  attr_reader :img_down_path #图片下载路径存放

  DEFAULT_GrabtvType=["cctv","satellite","digital",]
  DEFAULT_SITE = "http://www.tvmao.com"


  def initialize
    @grabbase = GrabBase.new
    @@proxyindex = 0
  end


  #批量从tvmao获取节目类型
  #channel 节目表属于的屏道
  #url 节目表获取的网络地址
  #date 日期
  #schedule 需要批量修改的时间表
  #proxylist 代理列表
  def get_show_type_by_batch(channel,url,date,schedule,proxylist)
    _schedule =  {}
    schedule.each do |s|
     time = s["schedule_start"].gsub(":","").to_i
     _schedule.merge!(time=>s)
    end
    url = get_show_type_url(url,date)
    schedules = get_schedulelist_atday(channel,url,proxylist)
    type = []
    schedules.each do |schedule|
      begin
      schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
      if _schedule.has_key?(schedule_time_num)
        _schedule[schedule_time_num]["type"]=_schedule[schedule_time_num]["type"]|schedule["type"] if schedule["type"]
        p "*****************************************************************************************"
        p "Schedule: #{_schedule[schedule_time_num]}"
        p "schedule_logo_1: #{_schedule[schedule_time_num]["schedule_logo"]}"
        p "schedule_logo_2: #{_schedule[schedule_time_num][:schedule_logo]}"
        if _schedule[schedule_time_num]["schedule_logo"]==""
          unless schedule["img"]==""
            _schedule[schedule_time_num]["schedule_logo"]=schedule["img"]
          end
        end
      end
      rescue
        next
      end
    end
    ret = []
    _schedule.each do |key,value|
      ret << value
    end

    ret
  end

  #批量从tvmao获取节目类型
  #channel 节目表属于的屏道
  #url 节目表获取的网络地址
  #date 日期
  #time 节目开始时间
  #proxylist 代理列表
  def get_show_type(channel,url,date,time,proxylist)
    url = get_show_type_url(url,date)
    schedules = get_schedulelist_atday(channel,url,proxylist)
    _time_num = time.gsub(":","").to_i
    type = nil
    schedules.each do |schedule|
      schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
      if _time_num==schedule_time_num
        type = schedule["type"]
      end
    end
    if type
      return type
    else
      return []
    end
  end

  def get_show_type_url(url,date)
    whatday = 0
    _date = date.split("(")[0]
    case _date
      when "星期一"
        whatday=1
      when "星期二"
        whatday=2
      when "星期三"
        whatday=3
      when "星期四"
        whatday=4
      when "星期五"
        whatday=5
      when "星期六"
        whatday=6
      when "星期日"
        whatday=7
    end

    get_week_url = lambda {|url,whatday|
      _url = "http://www.tvmao.com"
      urls = []
      _urls = url.split("-")
      0.upto(1).each do |i|
        _url = _url+"#{_urls[i]}"+"-"
      end
        url = _url+"w#{whatday}.html"
      return url
    }
    return get_week_url.call(url,whatday)
  end


#将星期的wday获取值转化为中文名
#conversion wady to chinese
  def conversion_what_day(whatday)
    ret = "星期"
    case whatday.to_i
      when 1
        ret += "一"
      when 2
        ret += "二"
      when 3
        ret += "三"
      when 4
        ret += "四"
      when 5
        ret += "五"
      when 6
        ret += "六"
      when 7
        ret += "日"
      when 0
        ret += "日"
    end
    ret
  end

  #如果时间为1~9的一位则为其在数字前加0补齐二位
  def dispose_time(num)
    num = num.to_s
    if num.length < 2
      num = "0"+num
    end
    num
  end

  #转化当前时间的格式
  def get_week_date_time(time)
    month = time.month
    day = time.day
    whatday = time.wday
    ret = conversion_what_day(whatday) + "(" + dispose_time(month) + "-"+dispose_time(day)+")"
    ret
  end

  #前几天需要减去的num
  def del_day_num(day_num)
    ret = day_num*60*60*24
    ret
  end

  #获取距离当前多少天的之前的日期
  def get_time_day_prior(num)
    time = Time.now - del_day_num(num)
    ret = get_week_date_time(time)
    ret
  end

  #前面一周要删除的日期的列表
  def del_time_list
    ret = []
    time = Time.now
    wday = time.wday
    if(wday==1)
      for i in 0..7
        ret<<get_time_day_prior(i)
      end
    end
    ret
 end


  #调用此方法的例子
  def start
    #作用是获取俩个字符串的相似度
    #get str1 and str2 similarity
    get_similarity_string = lambda { |str1,str2|
      _length = 0
      type = 0
      if str1.length>str2.length
        _length=str2.length
        type = 2
      else
        _length=str1.length
        type =1
      end
      _str_list = []
      _str = ""
      for i in 0.._length
        case type
          when 2
            n=i
            0.upto(str1.length-1).each do |j|
              p "N: #{n}"
              if(str2[n]==str1[j])
                _str =_str+str2[n]
                n = n+1
                p "Str = #{_str}"
              else
                _str_list << _str
                _str = ""
              end
            end
          when 1
            n=i
            0.upto(str2.length-1).each do |j|
              p "N: #{n}"
              if(str1[n]==str2[j])
                _str =_str+str1[n]
                n=n+1
                p "Str = #{_str}"
              else
                _str_list << _str
                _str = ""
              end
            end
        end
      end
      p _str_list
      _str = ""
      _str_list.each do |str|
        if _str.length<str.length
          _str=str
        end
      end
      _str
    }


    path = "/home/zql/workspace/New/smart_remote/img_path"
    channel_list = GrabTvmao.getchannels(path)
    channel_urls = channel_list['channel_urls']
    channel_infos = channel_list['channel_info']
    p "Channel img save file,path='#{GrabTvmao.img_down_path}'"
    proxy_list=GrabTvmao.get_topfast_list(5)  #get_topfast_list 参数是代表最慢用时 单位秒


    #Use for Test

    p "************************************"
    p "proxy_list:#{proxy_list}"
    p "************************************"

    bool_start = false


    channel_urls.each do |channel,url|

      if(channel=="CCTV16")
        bool_start = true
      end

      if bool_start
        previous_show_name = ""
        channel_info = channel_infos[channel]
        channel_name = channel_info["channel_name"]
        channel_type = channel_info["channel_type"]
        channel_id = channel_info["channel_id"]
        channel_img_path = channel_info["img_path"]

        #channel,herf,proxylist,day_num=7


        start_time=0
        use_num =1

        #getScheduleAssignDate参数：
        # channel 频道
        # herf 频道地址
        # proxylist 代理列表
        # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天   负数代表今天之前的第几天
        # day_num 抓取的时间段天数
        # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
        schedule_list=GrabTvmao.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num)  #抓取的七天后的1天的数据


        end
      end
    end

  def img_down_path
    @img_down_path
  end


  #获取网站的频道表
  #img_path 图片存放路径
  def getchannels(img_dir_path)
    @channel = []
    @site=DEFAULT_SITE
    @img_down_dir_path = img_dir_path
    @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')

    channel_urls = {}
    channel_info = {}
    get_url =lambda { |type|
      @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
        }

    get_channel_id = lambda {|url|
      channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
    }

    DEFAULT_GrabtvType.each do |type|
      url = get_url.call(type)
      p url
      doc = Nokogiri::HTML(open(url))
      p doc.content
      p "*************************************************************"
      doc.css('td[class="tdchn"]').each do |td|
       channel_name=td.content
       herf = ""
       td.css('a').each do |a|
        herf=a['href']
       end
        channel_id = get_channel_id.call(herf)

       #获取频道图片的地址
        img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
        @img_down_file.puts("#{channel_id}:#{img_path}")
        @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
        channel_info.merge!({channel_id=>{"channel_name"=>channel_name,"channel_type"=>type,"channel_id"=>channel_id,"img_path"=>img_path}})
        channel_urls.merge!({channel_id=>herf})
      end
    end
    @img_down_file.close
    p "Channel: #{@channel}"
    {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
  end


    def err_doc_proxy(proxy,proxylist,url="",err="")
      if proxy.empty?||proxy.nil?
        proxylist.delete_at[@@proxyindex]
      end


      unless @no_firest
        @no_firest = 0
      end

      @no_firest += 1
      p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
      #proxylist.delete(proxy)    #删除出错的代理  但如果是此网页错误则会引起BUG待修复
      @@proxyindex += 1
      @@proxyindex=@@proxyindex%@size
      doc=get_doc_with_proxy(proxylist,url) if @no_firest<10
      unless @no_firest<10
        @no_firest=0
        raise RuntimeError,"Error: #{err}"
      end
      doc
    end


    #使用代理获取url的html的doc值
    def get_doc_with_proxy(proxylist,url)
      unless proxylist.nil?||proxylist.empty?
        unless @@proxyindex
          @@proxyindex = 0
        end
        @size = proxylist.size
        @@proxyindex=@@proxyindex+Time.now.to_i+1
        @@proxyindex=@@proxyindex%@size
        if(proxylist[@@proxyindex])
          proxy = proxylist[@@proxyindex]
        else
          @@proxyindex=@@proxyindex+1
          proxy = proxylist[@@proxyindex]
        end
        begin
          doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}").read) unless proxy.nil?||proxy.empty?
          if doc.nil?
            p "DOC is nil"
            doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
            @no_firest=0
          end
          @no_firest = 0
        rescue => err
          p "IN Rescue"
          doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
          @no_firest=0
          p "Get DOC"
          @@proxyindex=@@proxyindex+Time.now.to_i+1
          @@proxyindex=@@proxyindex%@size
          return doc
        end
      else
        begin
          doc = Nokogiri::HTML(open(url).read) if proxy.nil?||proxy.empty?
        rescue => err
          p "Error : Proxy:#{proxy}, url:#{url}"
          raise RuntimeError,"Error: #{err.to_s} Method:get_doc_with_proxy"
        end
      end
      doc
    end

  #获取某天的节目表
  def get_schedulelist_atday(channel,url,proxylist)
    p "Grab: #{url}"
    doc = get_doc_with_proxy(proxylist,url)
    show_type = []


    _img_url = "http://static.haotv.me/channel/logo/"
    img_url = _img_url + channel+".jpg"


    data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
    date = data[0]
    week = data[1]
    p "Channel: #{channel}  Date: #{date} Week: #{week}"
    @date = "#{week}(#{date})"
    schedule_list = []

    _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
    img_url = _herf.get_attribute("src") if _herf

    p "**************IMG: #{img_url}"


    doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
      _herf= schedule.xpath('a[@href]')[0]
      schedule_herf=_herf.get_attribute("href") if _herf
      unless _herf
        drama =schedule.css('a[class="drama"]')[0]
        if drama
          _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
          schedule_herf = _herfs.split("#%#")[0]
        end
      end
      if schedule.content.split(" ").size>1
        time = schedule.content.split(" ")[0]
        schedule = schedule.content.split(" ")[1]
        show_name = ""
        unless schedule_herf.nil?||schedule_herf.empty?
          p "Show_infomation:#{schedule_herf} Time:#{time}"
          show_infomation=get_show_infomation(proxylist,schedule_herf)
          show_type=show_infomation["type"]
          show_name = show_infomation["name"]
          show_img = show_infomation["img"]
        end
        p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf}  type: #{show_type} name: #{show_name} img:#{show_img}"
        schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
      end
    end
    schedule_list
  end

  #获取制定时间和长度url
  #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
  #day_num 为int型 代表抓取的时间从开始时间计算的多少天
  def get_assign_date_url(url,start_time,day_num)
    site="http://www.tvmao.com"
    if(@site)
      site=@site
    end

    _url = site
    urls = []
    _urls = url.split("-")

    time = Time.now
    _wday = time.wday
    wday = _wday + start_time
    if wday<0
      wday = 1
    end

    end_day = wday + day_num - 1

    if end_day>(_wday+7)
     end_day = _wday + 7
    end

    0.upto(1).each do |i|
      _url = _url+"#{_urls[i]}"+"-"
    end

    wday.upto(end_day).each do |i|
      urls << _url+"w#{i}.html"
    end
    urls
  end


  #获取指定时间段的节目表
  def getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
    begin
      day_num = 1 if day_num<1
    rescue
      day_num = 1
    end
    site="http://www.tvmao.com"
    unless img_dir_down_path
      img_dir_down_path = __FILE__
    end
    @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

    if(@site)
      site=@site
    end
    _img_url = "http://static.haotv.me/channel/logo/"
    @show_schedule = {}

    channel_schedule = {}
    get_assign_date_url(herf,start_num,day_num).each do |url|
      @date = ""
      schedule_list = get_schedulelist_atday(channel,url,proxylist)
      channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
    end
    @img_down_file.close
    {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
  end


  #因原已调用所以保留
  #获取一周节目表
  def getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
    p "Day Num is #{day_num}"
    begin
      day_num = 1 if day_num<1
    rescue
      day_num = 1
    end
    site="http://www.tvmao.com"
    unless img_dir_down_path
      img_dir_down_path = __FILE__
    end
    @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

    if(@site)
      site=@site
    end
    _img_url = "http://static.haotv.me/channel/logo/"
    @show_schedule = {}

    get_week_url = lambda {|url,day_num|
       _url = site
       urls = []
       _urls = url.split("-")
       0.upto(1).each do |i|
        _url = _url+"#{_urls[i]}"+"-"
       end
      1.upto(day_num).each do |i|
        urls << _url+"w#{i}.html"
      end
      urls
    }

    channel_schedule = {}
    get_week_url.call(herf,day_num).each do |url|
      @date = ""
      schedule_list = get_schedulelist_atday(channel,url,proxylist)
      channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
    end
    @img_down_file.close
    {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
  end


  #获取节目详细信息
  def get_show_infomation(proxy_list,schedule_herf)
    begin
    unless @site
      @site = "http://www.tvmao.com"
    end
    schedule_herf = @site + schedule_herf
    doc = get_doc_with_proxy(proxy_list,schedule_herf)
    type = []
    name = doc.css('span[itemprop="name"]')[0].content

    #获取节目的图片
    if doc.css('img[class="tvc"]')
     schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')[0]
    end


    doc.css('span[itemprop="genre"]').each do |_type|
      type << _type.content
    end
    doc.css('a[itemprop="genre"]').each do |_type|
      type<<_type.content
    end
    url = "#{schedule_herf}/detail"
    doc = get_doc_with_proxy(proxy_list,url)
    if doc
      doc.css('span[itemprop="genre"]').each do |_type|
        type << _type.content
      end
    end
    type.uniq!
    unless @show_schedule
      @show_schedule={}
    end
    #@show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
    {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
    rescue => e
      p "Error In get_show_infomation msg : #{e.to_s}"
    end
  end

  #获取节目的时间表
  def get_show_schedule(proxylist,herf)
    url = herf + "/playingtime"
    doc = get_doc_with_proxy(proxylist,url)
    i = 0
    schedule = []
    if  doc.css('div[id="epg"]')[0]
      doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
        unless(i==0)
          time = epg.css('div[class="f1 fld"]')[0].content
          channel_name = epg.css('div[class="f2 fld"]')[0].content
          show_name = epg.css('div[class="f3 fld"]')[0].content
          times = time.split(" ")
          week = times[0]
          date = times[1]
          _time = times[2]
          schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
        end
        i += 1
      end
    end
    schedule
  end


  #获取指定访问速度的代理服务器
  #time为最慢速度的时间 int型 代表秒
  def get_topfast_list(use_time)
    fast_list = []
    time_use = 0
    ips_ports = get_proxy_list()
    ips_ports.each do |ip_port|
      time_start = Time.now.to_i
      begin
        timeout(use_time) do
          doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
        end
        time_end = Time.now.to_i
        time_use = time_end - time_start
        p  "http://#{ip_port}   use_time:#{time_use}"
      rescue Exception =>e
        case e
          when Errno::ETIMEDOUT
            p "Use http://#{ip_port} timeout"
          when Timeout::Error
            p "Use http://#{ip_port} timeout"
          when Errno::ECONNREFUSED
            p "Use http://#{ip_port} Error connection"
          else
            p "Use http://#{ip_port} Error:#{e.to_s}"
        end
        time_use = -1
      end
      if(time_use > 0 &&time_use < 8)
        fast_list << ip_port
      end
    end
    fast_list
  end

  #获取代理列表
  def get_proxy_list()
    list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
    if list.count ==0
      list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
    end
    ips_ports = []
    regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
    regex_ip   = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
    list.each do |proxy_txt|
      port = proxy_txt[regex_port]
      ip = proxy_txt[regex_ip]
      if(ip != ""&& !port.to_s.eql?('3128'))
        port_ip = ip.to_s + ":" + port.to_s
        ips_ports << port_ip
      end
    end
    p "Count: #{ips_ports.count}"
    ips_ports
  end

  def gg(url)
    regex_list = /<TD class="list">.*<\/TD>/
    href =URI.parse(url)
    contxt = ""
    href.open{ |f|
      f.each_line {|line| contxt =contxt + line + "\n"}
    }
    list = contxt.scan(regex_list)
  end

  def save_img

  end
 end
end