require 'open-uri'
require 'zlib'

module Sjunkieex

  class Interface

    STANDARD_CONFIG = {
          url: "http://serienjunkies.org",
          german_only: true,
          subbed_allowed: false,
      }

    attr_reader :options

    def initialize(series_index, options = {})
      @options = STANDARD_CONFIG.merge(options)
      @index = series_index
    end

    # Public: Looks for new episodes on the homepage
    #
    # Returns a Hash of links for sites that should be visited
    def look_for_new_episodes()
      link_freq = {}

      doc = Nokogiri::XML(get_page_data(@options[:url]))
      doc.css("div#content > div.post > div.post-content a").each do |link|
        content = link.content

        # skip links that are not suitable
        next unless is_useful?(content)

        series_name = Sindex::SeriesIndex.extract_seriesname(content)
        language = get_language_from_link_data(content)
        href = link[:href]

        next if @index.episode_existing?(series_name, content, language)

        # count the occurrences for each link and select the most common one
        link_freq[series_name] ||= {}
        link_freq[series_name][href] ||= 0

        link_freq[series_name][href] += 1
      end

      Hash[ link_freq.collect { |series,v| [ v.key(v.values.max), series ] } ]
    end

    # Public: parses a series page and extracts links
    #
    # series_name - the series name and the key in the index
    # series_link - the link to the page
    #
    # Returns a hash indexed by series identifier
    def parse_series_page(series_name, series_link, recursive=true)

      link_data = Hash.new

      doc = Nokogiri::XML(get_page_data(series_link))
      doc.css("div#content > div.post div.post-content p").each do |paragraph|

        next if paragraph[:class]
        episode_data = paragraph.css("strong:first-child").text

        next unless is_useful?(episode_data)

        language = get_language_from_link_data(episode_data)

        next if @index.episode_existing?(series_name, episode_data, language)

        if id = Sindex::SeriesIndex.extract_episode_identifier(episode_data)

          # classify episode resolution
          resolution = :sd
          (resolution = :hd_720p) if episode_data.match(/720[pi]/i)
          (resolution = :hd_1080p) if episode_data.match(/1080[pi]/i)

          # extract hoster links
          episode_links = []
          paragraph.css("a").each do |link|
            episode_links << link[:href]
          end

          (link_data[id] = Hash.new) unless link_data[id]
          link_data[id][resolution] = episode_links
          link_data[id][:episodedata] = episode_data
          link_data[id][:series] = series_name
        end
      end

      # check if this page contains pagination (for example TBBT has more than
      # 3 pages), so we have to parse all pages recursively
      data_from_other_page = {}

      if recursive && next_page = doc.css('a.next').first
        data_from_other_page = parse_series_page(
            series_name, next_page[:href], recursive)
      end


      return link_data.merge(data_from_other_page)
    end

    private

    # Internal: check the link data against criterias
    #
    # link_data - data for the link
    #
    # Returns true if the link is useful or false if it can be skipped
    def is_useful?(link_data)

        return false unless link_data.match(/S\d+E\d+/i)

        return false unless @index.is_series_in_index?(link_data)

        language = get_language_from_link_data(link_data)
        return false if language.nil?

        series_name = Sindex::SeriesIndex.extract_seriesname(link_data)

        return false unless
            @index.is_series_in_this_language?(series_name, language)

        if not @options[:subbed_allowed]
          return false if link_data.match(/Subbed/i)
        end

        true
    end

    # Private: determines the language the link is in
    #
    #   data - link data
    #
    # Returns either :de or :en
    def get_language_from_link_data(data)

        return nil unless data.match(/S\d+E\d+/i)

        if data.match(/German/i)
          :de
        else
          :en
        end
    end

    # Internal: get a page and do some stuff if the page is gzip encoded
    #
    # link - the link that is fetched
    #
    # Returns the page content
    def get_page_data(link)

      body = nil

      stream = open(link)
      if stream.is_a? File
        # file is a local file, has not methods below
        body = stream.read
      else
        # file is web uri
        if (stream.content_encoding.empty?)
          body = stream.read
        else
          body = Zlib::GzipReader.new(stream).read
        end
      end

      return strip_multiple_doctypes(body)
    end

    def strip_multiple_doctypes(content)
      return content.split(/(?=\<\!DOCTYPE)/).last
    end

  end

end