#!/usr/bin/ruby -w autoload :XMLTV, 'xmltv/xmltv' require 'date' class Date def german "#{day}.#{month}.#{year}" end end module XMLTV class TvtodayGrabber < Grabber # Must implement fetch_all_channels or define channel_list as nil # Must returns something that respond_to? [] def lang 'de' end def fetch_all_channels puts base_url url = @base_url page = fetch(url) channels = Hash.new page.at('//select[@name=channel]').search('//option').each do |op| val = op['value'] next if val.index('gruppeId') || val.empty? channels[val] = op.inner_text.strip.to_utf end save_object(channels, channel_list) channels end def grab_detail(href) program = Hash.new if href[0] == ?/ href="#{base_url}#{href}" end # puts href page = fetch(href) info = page.at('span.tv-titel-popup').parent # puts info.at('span.tv-titel-popup').inner_text.to_utf program['desc-s'] = info.at('span.tv-vorspann-popup').inner_text.to_utf page.at('td.tv-sendung-info').each_child do |el| if el.is_a? Hpricot::Text program['desc'] = el.inner_text.to_utf break end end credits = page.at('span.tv-credits') if credits program['info'] = credits.parent.inner_text.to_utf end categ = info.at('span.tv-auszeichnung') if categ program['kind'] = categ.inner_text.to_utf end info.search('img').each do |img| program[img['title']] = true end program end def clean_cache(cache) count = 0 cache.delete_if do |dt, en| rsl = (Date.parse(en['datum']) < Vandaag) count += 1 if rsl rsl end count end def day_url(chan_id, day) "#{base_url}/program2007?format=genre&offset=0&date=#{day.german}&slotIndex=all&channel=#{chan_id}&order=time" end # grab_channel return chan_idber of programs found def grab_channel(chan_id) fetched = found = 0 @channelhash = load_cachefile(chan_id) now = Date.today (now .. now + 14).each do |date| url = day_url(chan_id, date) # puts url # url = '/home/han/program2007' page = fetch(url) avond = false page.search('div#program_complete//tr').each do |prog| # puts prog times = prog.at('span.tv-sendung-uhrzeit') next unless times # puts times.inner_text titel = prog.at("td/a.tv-sendung-titel") id = titel['href'][/detailPopup\('(\d+)'/, 1] href = "/program2007?format=detail&sid=#{id}" # puts "#{id}: #{@channelhash[id]}" found += 1 next if @channelhash[id] begin @channelhash[id] = program = grab_detail(href) fetched += 1 rescue STDERR.puts href, page, '=====' raise end times = times.inner_text start = times.to_i avond = true if start > 17 program['times'] = times program['title'] = titel.inner_text.to_utf program['datum'] = date.to_s program['evening'] = avond # exit end end save_object(@channelhash, cachefile(chan_id)) if fetched > 0 found end # transform returns an array of special hashes (obtained with proghash) # def transform(chan_id) progdata_array = Array.new @channelhash.each_pair do |id, entry| begin progdata = proghash(entry, chan_id) # pp progdata # pp entry start, stop = entry['times'].split('-') date = Date.parse(entry['datum']) startuur, startmin, stopuur, stopmin = entry['times'].split(/[-.]/).map{|x| x.to_i} if startuur < 7 && entry['evening'] date += 1 end progdata['start'] = Time.local(date.year, date.month, date.day, startuur, startmin) progdata['stop'] = Time.local(date.year, date.month, date.day, stopuur, stopmin) if progdata['stop'] < progdata['start'] progdata['stop'] += Dag end date_stats(chan_id, progdata['start']) # puts progdata['stop'], progdata['start'] if entry['desc'].index("\t\t\t\t>") || entry['desc'].empty? && ! entry['desc-s'].empty? progdata['desc'] = entry['desc-s'] end if entry['info'] # puts entry['info'] details = entry['info'].split(';').map{|x| x.strip} role = nil details.each do |det| year = nil case det[0,2] when 'R:' role = 'director' det = det[2..-1].strip when 'D:' role = 'actor' det = det[2..-1] when 'O:' year = det[-5..-1].to_i else year = det[-5..-1].to_i end if year && year > 1900 progdata['date'] = year.to_s elsif role content = det.gsub(/\(.*\)/,'').strip unless content.empty? (progdata['credits'][role] ||= Array.new) << content end end end progdata['audio']['stereo'] = 'stereo' if entry['Stereo'] progdata['subtitles']['type'] = 'teletext' if entry['Untertitel'] progdata['video']['aspect'] = '16:9' if entry['Breitbild'] end progdata_array << progdata rescue StandardError => exc STDERR.puts exc, exc.message, exc.backtrace PP.pp(entry, STDERR) raise end end progdata_array end end end XMLTV::TvtodayGrabber.new.run