#!/usr/bin/ruby -w autoload :XMLTV, 'xmltv/xmltv' require 'date' #require 'uri' class Date Vandaag = Date.today def self.upcdate(string) if string == 'today' return Vandaag end rsl = dutch(string) rsl += 7 if rsl == Vandaag rsl end end module XMLTV class UpcGrabber < Grabber Days_to_grab = %w{ today } + Date::DAYNAMES.map{|x| x.downcase} Cattrans = { 'drama' => 'Drama', 'educatie' => 'Educational', 'erotiek' => 'Adult', 'kids/jeugd' => 'Children', 'kunst/cultuur' => 'Arts/Culture', 'nieuws' => 'News', 'show/spelshow' => 'Game', 'speelfilm' => 'Movies', 'sport' => 'Sports', 'vrijetijd' => 'Educational' } def base_url 'http://www.upclive.nl' end def fetch_all_channels channels = Hash.new catch(:ready) do 1.upto 200 do |pagenr| www = "#{base_url}/Televisie/TV_gids/Zenders/?channels=All&theme=All&page=#{pagenr}" STDERR.puts(www) if XmltvOptions.verbose page = fetch(www) # cont = page.at('div.epg_listings_bar4c_container') chns = page.search('div.epg_listings_bar4c') + page.search('div.epg_listings_bar4cend') chns.each do |chn| if chn.at('h5 a').nil? throw :ready end url = REXML::Text.read_with_substitution(chn.at('h5 a')['href']).gsub('nowandnext','') name = URI.unescape(url[/channels=(.*)&/, 1]) tekst = chn.at('div.right') zender = tekst.inner_text[/Zendernummer\s+([0-9]+)/, 1] packet = tekst.at('a.epg_underline').inner_text.split[-1] packet = '' if packet == 'Pack' channels[zender] = [name, packet, url] end end end save_object(channels, channel_list) channels end def clean_cache(cache) count = 0 cache.delete_if do |dt, en| rsl = (Date.parse(dt) < Vandaag) count += 1 if rsl rsl end count end def channel_display(chan_id) all_channels[chan_id][0] end def day_url(chan_id, day) "#{base_url}#{all_channels[chan_id][2]}#{day}_all" end def grab_channel(chan_id) @all_days = load_cachefile(chan_id) @all_days.delete(Date.today.to_s) ## Always fetch today to calibrate calendars progs = 0 Days_to_grab.each_with_index do |day, dchan_id| date = Date.upcdate(dchan_id == 0 ? day : Date::Dagen[dchan_id - 1]) next if @all_days.has_key?(date.to_s) url = day_url(chan_id, day) page = fetch(url) begin datum = Date.dutch(page.at('//div.epg_listings_bar1//span').inner_text) rescue NoMethodError save(url, page) raise end if day == 'today' && datum != Date.today # @all_days = Hash.new ## Invalidate cache raise "#{day} #{datum} Upc is gek geworden" end noshift = nil programs = Array.new page.at('div.ch_ci2_epg_center').search('/div').each do |programme| case programme['class'] when 'epg_header_4' noshift = ( programme.inner_text.strip.downcase == 'ochtend') when 'listing_visible' programs << (program = Hash.new) program['noshift'] = noshift program['title'] = programme.at('div.col7').inner_text.strip program['times'] = programme.at('div.col8').inner_text.gsub(/\s/,'') program['category'] = programme.at('div.col9').inner_text.strip desc = '' programme.at('div.info_color').each_child do |node| break if node.class != Hpricot::Text desc << node.to_s.strip end program['desc'] = desc end end @all_days[datum.to_s] = programs progs += programs.size end save_object( @all_days , cachefile(chan_id)) progs end def transform(chan_id) # @all_days = load_cachefile(chan_id) progdata_array = Array.new @all_days.each_pair do |datum, programs| jaar, maand, dag = datum.split('-').map{|x| x.to_i} programs.each do |entry| progdata = proghash(entry, chan_id) # pp progdata, '===' # pp entry startuur, startmin, stopuur, stopmin = entry['times'].scan(/\d+/).map{|x| x.to_i} shift = startuur < 8 && entry['noshift'] == false ? Dag : 0 progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift progdata['stop'] = stop = Time.local(jaar, maand, dag, stopuur, stopmin) if stop < start && stopuur <= 10 progdata['stop'] += Dag end date_stats(chan_id, progdata['start']) if progdata['stop'] < progdata['start'] reject(myname, day_url(chan_id, 'today'), entry, progdata) next end date_stats(chan_id, progdata['start']) progdata['category'] = Cattrans[entry['category'].gsub(/\s+/,'').downcase] || 'onbekend' progdata_array << progdata # pp progdata end end progdata_array end end end XMLTV::UpcGrabber.new.run