class TaiwaneseNewsParser::Parser::ChinaTimes < TaiwaneseNewsParser::Parser
  def self.domain
    'chinatimes.com'
  end

  def self.names
    %w{中國時報 中時電子報 工商時報 旺報 時報週刊 中天 中視 中廣 中時即時}
  end

  def self.applicable?(url)
    url.include?('chinatimes.com') && !url.include?('money.chinatimes.com')
  end

  def doc
    @raw = open(url).read
    @doc = Nokogiri::HTML(@raw)
  end

  #url = 'http://news.chinatimes.com/mainland/11050505/112013041400325.html'
  #url = 'http://www.chinatimes.com/realtimenews/%E6%AD%BB%E4%BA%A1%E9%9B%B2%E9%9C%84%E9%A3%9B%E8%BB%8A-%E7%BE%8E%E5%A9%A6%E5%A2%9C%E8%90%BD%E8%BA%AB%E4%BA%A1-20130720002354-260408'
  def parse
    @article[:title] = doc.at_css('.page_container header h1').text

    @article[:company_name] = parse_company_name

    @article[:content] = doc.css('.page_container article>p').text

    #@article[:web_published_at] = Time.parse(doc.at_css('#story_update').text)

    @article[:reporter_name] = parse_reporter_name()

    t = doc.css('.reporter time').text.match(/(\d*)年(\d*)月(\d*)日 (\d*):(\d*)/)
    @article[:published_at] = Time.new(t[1],t[2],t[3],t[4],t[5])

    clean_up

    @article
  end

  def parse_reporter_name
    el = doc.at_css('.reporter a[rel=author]')
    return el.text if el

    text = doc.css('.reporter>text()').text
    if match = text.match(%r{記者(.+?)[/／╱／]})
      reporter_name = match[1]
    elsif match = text.match(%r{【(.+?)[/／╱／]})
      reporter_name = match[1]
    else
      reporter_name = text
    end
    reporter_name
  end

  def parse_company_name
    if doc.at_css('.reporter>a').nil?
      return '中時電子報'
    end

    n = doc.at_css('.reporter>a').text
    if n == '時週精選'
      n = '時報週刊'
    elsif n == '新聞速報'
      n = '中時電子報'
    end
    n
  end

  def clean_url
    cleaner = TaiwaneseNewsParser::UrlCleaner.new('id')
    @article[:url] = cleaner.clean(@article[:url])
  end

  def self.parse_url_id(url)
    url_id = url[%r{http://news\.chinatimes\.com/\w+/(\d+/\d+)},1]
    if url_id.nil?
      url_id = url[%r{[^-]*+[^-]*+-(\d+)-\d+},1]
    end
    if url_id.nil?
      url_id = url[%r{chinatimes\.com/(.+)},1]
    end
    url_id
  end
end