require 'open-uri' require 'zlib' module Sjunkieex class Interface STANDARD_CONFIG = { url: "http://serienjunkies.org", german_only: true, subbed_allowed: false, } attr_reader :options def initialize(series_index, options = {}) @options = STANDARD_CONFIG.merge(options) @index = series_index end # Public: Looks for new episodes on the homepage # # Returns a Hash of links for sites that should be visited def look_for_new_episodes links = Hash.new doc = Nokogiri::XML(get_page_data(@options[:url])) doc.css("div#content > div.post > div.post-content a").each do |link| c = link.content #### # skip links that are not suitable next unless is_link_useful?(c) next unless @index.is_series_in_index?(c) series_name = Sjunkieex::SeriesIndex.extract_seriesname(c) next unless series_name next if @index.episode_existing?(series_name, c) href = link[:href] next if links.include?(href) links[href] = series_name end return links end # Public: parses a series page and extracts links # # series_name - the series name and the key in the index # series_link - the link to the page # # Returns a hash indexed by series identifier def parse_series_page(series_name, series_link) link_data = Hash.new doc = Nokogiri::XML(get_page_data(series_link)) doc.css("div#content > div.post div.post-content p").each do |paragraph| next if paragraph[:class] episode_data = paragraph.css("strong:first-child").text next unless is_link_useful?(episode_data) next if @index.episode_existing?(series_name, episode_data) if id = Sjunkieex::SeriesIndex.extract_episode_identifier(episode_data) # classify episode resolution resolution = :sd (resolution = :hd_720p) if episode_data.match(/720[pi]/i) (resolution = :hd_1080p) if episode_data.match(/1080[pi]/i) # extract hoster links episode_links = [] paragraph.css("a").each do |link| episode_links << link[:href] end (link_data[id] = Hash.new) unless link_data[id] link_data[id][resolution] = episode_links link_data[id][:episodedata] = episode_data link_data[id][:series] = series_name end end return link_data end private # Internal: check the link data against criterias # # link_data - data for the link # # Returns true if the link is useful or false if it can be skipped def is_link_useful?(link_data) return false unless link_data.match(/S\w+E\w+/i) # skip links depending on language if @options[:german_only] return false unless link_data.match(/German/i) unless @options[:subbed_allowed] return false if link_data.match(/Subbed/i) end else return false if link_data.match(/German/i) end true end # Internal: get a page and do some stuff if the page is gzip encoded # # link - the link that is fetched # # Returns the page content def get_page_data(link) body = nil stream = open(link) if stream.is_a? File # file is a local file, has not methods below body = stream.read else # file is web uri if (stream.content_encoding.empty?) body = stream.read else body = Zlib::GzipReader.new(stream).read end end return body end end end