#!/usr/bin/ruby -w require 'rubygems' require 'hpricot' require 'rexml/document' require 'yaml' require 'date' require 'fileutils' require 'timeout' require 'open-uri' require 'pp' require 'iconv' require 'set' require 'optparse' require 'optparse/time' require 'ostruct' class Time public :to_date end module MailSender def linuxmail(to, subj, body) from = `/usr/bin/id --name --user`.chomp header = ["To: #{to}", "From: #{from}", "Subject: #{subj}"].join("\n") mess = [header,"", body].join("\n") puts mess IO.popen("/usr/sbin/sendmail -t", 'w') do |h| h.puts mess end end def sendmail(*args) if test(?x, '/usr/sbin/sendmail') linuxmail(*args) else raise "FIXME: Don't know how to send mail" end end module_function :sendmail, :linuxmail end class String UTF = 'utf-8' ISO = 'iso-8859-15' TO_UTF = Iconv.new(UTF, ISO) CK_UTF = Iconv.new(UTF, UTF) def to_utf TO_UTF.iconv(self) end def ck_utf CK_UTF.iconv(self) end end class Date Maanden = [nil] + %w{ januari februari maart april mei juni juli augustus september oktober november december } Dagen = %w{ zondag maandag dinsdag woensdag donderdag vrijdag zaterdag } def self.dutch(instring) nu = today defjaar, defmaand, defdag, weekdag = nu.year, nu.month, nu.day, nu.wday jaar = maand = dag = nil string = instring.gsub(/[[:cntrl:][:punct:]]/, '').downcase string.split.each do |elem| case elem when /[[:alpha:]]+/ if Maanden.include?(elem) maand = Maanden.index(elem) if maand < nu.month defjaar += 1 end elsif Dagen.include?(elem) wd = Dagen.index(elem) - weekdag wd += 7 if wd < 0 nu += wd defjaar, defmaand, defdag, weekdag = nu.year, nu.month, nu.day, nu.wday end when /[[:digit:]]+/ ent = elem.to_i if ent <= 31 dag = ent elsif ent > 1900 jaar = ent end end end new(jaar || defjaar, maand || defmaand,dag || defdag) end end module XMLTV class ValidateError < Exception; end class BadChannelError < Exception; end class BadSiteError < Exception; end class Progdata < Hash def self.new super {|h,v| h[v] = Hash.new } end end VERSION = '0.8.3' Progdir = ($LOAD_PATH.find {|x| test(?f, "#{x}/xmltv/xmltv.rb")} || 'lib') + '/xmltv/sites' Sites = Dir["#{Progdir}/*.rb"].map{|x| x[0..-4]}.map{|x| File.basename(x)} class XmltvOptparser # # Return a structure describing the options. # def self.parse(args) options = OpenStruct.new options.basedir = "#{ENV['HOME']}/.xmltv" options.validate = true opts = OptionParser.new do |opts| opts.banner = "Usage: #{$PROGRAM_NAME} [options]" opts.separator "" opts.separator "Specific options:" opts.on("-a", "--list-all", "List all available channels") do |av| options.available = av options.action = true end opts.on("--add x,y,z", Array, "Add channels to config") do |list| options.add = list options.action = true end opts.on("--delete x,y,z", Array, "Delete channels from config") do |list| options.del = list options.action = true end opts.on("-l", "--list", "List configured channels") do |list| options.list = list options.action = true end opts.on("-c", "--config-file FILENAME", "Configuration file") do |list| options.config = list end opts.on( "--basedir DIRNAME", "Basedir (#{options.basedir})") do |list| options.basedir = list end opts.on( "--no-validate", "Do not validate") do |v| options.validate = false end opts.on( "--mailto USER", "Mail exception") do |user| options.mailto = user end opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| options.verbose = v end opts.separator "" opts.separator "Common options:" opts.on_tail("-h", "--help", "Show this message") do STDERR.puts opts exit end opts.on_tail("--version", "Show version") do options.version = true end end opts.parse!(args) options end end XmltvOptions = XmltvOptparser.parse(ARGV) class XmlWriter With_lang = [ 'title','sub-title','desc','category','language','orig-language','country','premiere','last-chance'] Programme_dtd = [ ['title' ], [ 'sub-title' ], [ 'desc' ], [ 'credits', nil, [ 'director', 'actor', 'writer', 'adapter', 'producer', 'presenter', 'commentator', 'guest' ] ], ['date'], ['category' ] , ['language' ] , ['orig-language' ] , [ 'length', [ 'units' ] ], [ 'icon', [ 'src', 'width', 'height' ] ], [ 'url'], [ 'country' ] , [ 'episode-num', [ 'system' ] ], [ 'video', [], [ 'present', 'colour', 'aspect', 'quality' ] ], [ 'audio' , [], [ 'present', 'stereo' ] ], ['previously-shown', ['start', 'channel' ]], ['premiere' ], [ 'last-chance' ], [ 'new' ], [ 'subtitles', [ 'type' ], [ 'language' ] ], [ 'rating', [ 'system' ] , [ 'value', 'icon' ] ], [ 'star-rating', [], [ 'value', 'icon' ] ] ] def initialize(grabber) @grabber = grabber @doc = doc = REXML::Document.new doc << REXML::XMLDecl.new("1.0", "UTF-8") dtd = grabber.config['dtd'] || 'file:///usr/share/xmltv/xmltv.dtd' doc << REXML::DocType.new('tv', %Q{SYSTEM "#{dtd}"}) @el_tv = doc.add_element( 'tv' ) @el_tv.attributes['generator-info-name'] = 'xmltv.rb' end def add_dtd(progdata) raise ArgumentError.new("add_dtd must have a Progdata") unless progdata.is_a?(Progdata) p = @el_tv.add_element('programme') %w{ start stop }.each do |word| p.attributes[word] = progdata[word].strftime( '%Y%m%d%H%M%S %Z') end p.attributes['channel'] = progdata['channel'] Programme_dtd.each do |el| name , attrs , elements = el # STDERR.puts name next if (cur = progdata[name]).empty? el = p.add_element(name) el.text = cur if cur.is_a? String if With_lang.include? name el.attributes['lang'] = @grabber.lang end if attrs attrs.each do |a| el.attributes[a] = cur[a] if cur[a] end end if elements elements.each do |e| case cur[e] when String el.add_element(e).text = cur[e] when Array cur[e].each do |r| el.add_element(e).text = r end end end end end end def write_file(pda, chan_id) pda.each do |progdata| begin add_dtd(progdata) rescue StandardError => exc STDERR.puts exc, exc.message, exc.backtrace, '====' PP.pp(progdata, STDERR) raise end end file = ">#{@grabber.outputfile(chan_id)}" errorfile = '/var/tmp/xmllint-errors' if XmltvOptions.validate IO.popen(" ( xmllint --valid - | tv_sort #{file} ) 2>#{errorfile}", 'w') do |h| write_xml h end if test(?s, errorfile) raise ValidateError.new("zie #{errorfile}") end else File.open("#{@grabber.outputfile(chan_id)}", 'w') do |h| write_xml h end end end def write_xml(h, indent = -1) @doc.write(h, indent) h.puts end end class Grabber Dag = 24 * 60 * 60 Vandaag = Date.today attr_accessor :myname, :chnbasedir, :spooldir, :channel_list, :lang, :generator, :base_url, :config_file_name attr_accessor :config, :all_channels, :reject_file_name mtv = begin YAML.load_file("#{XmltvOptions.basedir}/mythtv_chns.yaml") rescue Errno::ENOENT Hash.new end MythTV = Hash.new mtv.each do |myth, info| info[0].each do |file| MythTV[file] = myth end end def grab_channel(chan_id) STDERR.puts "Grabber must implement grab_channel(chan_id)" exit end def transform(chan_id) STDERR.puts "Grabber must implement transform(chan_id)" exit end def initialize @hits = 0 # @myname = File.basename($PROGRAM_NAME).gsub(/\..*/, '') @myname = self.class.to_s[7..-8].downcase @chnbasedir = "#{XmltvOptions.basedir}/#{myname}" @config_file_name = XmltvOptions.config || "#{chnbasedir}/config" @reject_file_name = "#{chnbasedir}/rejects" @config = load_config_file @lang = 'nl' @generator = "#{myname}.#{lang}" @base_url = "http://www.#{generator}" @spooldir = config['spooldir'] || "#{chnbasedir}/spool" [ chnbasedir, spooldir ].each do |dir| FileUtils.mkdir_p(dir) unless test(?d, dir) end @channel_list = "#{chnbasedir}/channel_list" @all_channels = load_channel_file(channel_list) end def version "XMLTV::Grabber version 0.8" end def outputfile(chan_id) "#{spooldir}/#{channel_name(chan_id)}.xml" end def channel_name(chan_id) "#{chan_id}.#{myname}.#{lang}" end def channel_display(chan_id) all_channels[chan_id] end def cachefile(chan_id) "#{chnbasedir}/#{channel_name(chan_id)}.yaml" end def load_cachefile(chan_id) begin YAML.load_file(cachefile(chan_id)) rescue Errno::ENOENT Hash.new end end def check_argv(args) case args[0] when 'all' all_channels.keys when 'new' XmltvOptions.only_new = true all_channels.keys else args end end def get_channels clean_cache_dir if ! ARGV.empty? ARGV.replace(check_argv(ARGV)) else config_channels end end def load_config_file begin YAML.load_file(config_file_name) rescue h = Hash.new h['channels'] = [] h end end def load_channel_file(fn) if (fn) begin YAML.load_file(fn) rescue Errno::ENOENT fetch_all_channels end end end def save_config loop do if test(?f, config_file_name) File.open(config_file_name, 'w') { |h| h.puts config.to_yaml } return end dir = File.dirname(config_file_name) if test(?d, dir) FileUtils.touch config_file_name elsif test(?e, dir) raise "#{dir} exists, but isn't a directory" else FileUtils.mkdir_p dir end end end def config_channels rsl = config['channels'] if rsl.empty? all_channels.keys else rsl end end def clean_spool_dir xmlfiles = config_channels.map{|x| outputfile(x) } Dir["#{spooldir}/*.xml"].each do |fn| unless xmlfiles.include? fn STDERR.puts("removed #{fn} -- not configured") File.unlink fn end end end def clean_cache_dir clean_spool_dir last_time = config['cleaned'] if last_time.is_a?(Date) && last_time >= Vandaag return end count = 0 channels = config_channels.map{|x| cachefile(x) } Dir["#{chnbasedir}/*.yaml"].each do |fn| unless channels.include? fn STDERR.puts("removed #{fn} -- not configured") File.unlink fn next end cache = YAML.load_file(fn) dels = clean_cache(cache) File.open(fn, 'w') {|h| h.puts cache.to_yaml} if dels > 0 count += dels config['cleaned'] = Vandaag end STDERR.puts("#{myname}: Removed #{count} entries") if count > 0 end def proghash(entry, chan_id) progdata = Progdata.new entry.keys.each do |k| dtd = XmlWriter::Programme_dtd.assoc(k) if dtd && dtd.size == 1 progdata[k] = entry[k] end end cnm = channel_name(chan_id) progdata['channel'] = MythTV[cnm] || cnm progdata end def reject(*args) File.open(reject_file_name, 'a') do |h| h.puts Time.now args.each do |arg| case arg when String, Time, Integer h.puts arg else PP.pp(arg, h) end end h.puts '====' end end def fix_times(pda) errors = 0 startwith = pda.size pda.delete_if do |x| rsl = x.has_key?('stop') && x['stop'] <= x['start'] reject('invalid', x) if rsl rsl end starttimes = pda.sort do |x, y| if (a = x['start']) != (b = y['start']) a <=> b else a = x.has_key?('stop') ? x['stop'] : x['start'] b = y.has_key?('stop') ? y['stop'] : y['start'] b <=> a end end starttimes.each_with_index do |entry, idx| # dump(entry, '++++++++++++++') if entry.has_key?('title') && entry['title'] == 'NOS Studio Voetbal' nxt = starttimes[idx + 1] next unless nxt unless entry.has_key?('stop') && entry['stop'] <= nxt['start'] errors += 1 entry['stop'] = nxt['start'].dup end end unless starttimes[-1].has_key?('stop') starttimes[-1]['start'] = nil end pda.delete_if do |x| rsl = x['start'].nil? || x['stop'] <= x['start'] reject('removed', x) if rsl rsl end @rejects = startwith - pda.size errors end def fetch(url) @hits += 1 tries = 0 begin open(url) { |h| Hpricot(h) } rescue Errno::ECONNREFUSED, ::Timeout::Error tries += 1 sleep 2 retry if tries <= 3 raise BadSiteError.new end end def save_object(obj, filename) File.open(filename, 'w') do |h| h.puts obj.to_yaml end end def save(*args) file = "/tmp/xmltv_exception.#{$$}" args.each_with_index do |el, i| File.open("#{file}-#{i}", 'w') do |h| h.puts el end end STDERR.puts "See #{file}-*" end def dump(*args) args.each do |arg| PP.pp(arg,STDERR) end end def date_stats(chan, time) @days_av[chan].add(time.to_date) end def check_channel(chan_id) all_channels.has_key?(chan_id) end def add_channels_to_config XmltvOptions.add.each do |item| if (f = all_channels.has_key?(item)) && ! config['channels'].include?(item) config['channels'] << item STDERR.puts("added #{item} #{channel_display(item)}") if XmltvOptions.verbose else reason = f ? "already included" : "not in channel list" STDERR.puts "#{item} #{reason}" end end save_config end def delete_channels_from_config XmltvOptions.del.each do |item| if config['channels'].include?(item) config['channels'].delete(item) STDERR.puts("deleted #{item} #{channel_display(item)}") if XmltvOptions.verbose else STDERR.puts "#{item} not in config list" end end save_config end def printline(it) printf("%-5s %s\n", it, channel_display(it)) end def do_list(array) array.sort_by{|x| channel_display(x)}.each do |item| printline item end end def list_config if config['channels'].empty? explan = channel_list ? 'yet' : 'needed' puts "No configuration #{explan} for #{myname}" else puts "Configured channels for #{myname}" do_list(config['channels']) end end def list_all puts "Available channels for #{myname}" do_list(all_channels.keys) end def do_options if XmltvOptions.list list_config elsif XmltvOptions.available list_all else if XmltvOptions.add add_channels_to_config end if XmltvOptions.del delete_channels_from_config end list_config end exit end def report(channel, nprogs, errors) STDERR.printf(" %s %-6.6s %-20.20s %5d %5d %5d %5d %8d %5d\n", Time.now.strftime("%H:%M:%S"), channel, channel_display(channel), nprogs, @hits, @days_av[channel].size, errors, test(?s, outputfile(channel)), @rejects) @hits = 0 end def run if XmltvOptions.version puts version exit end do_options if XmltvOptions.action if channel_list && ! test(?f, config_file_name) # We don't want to grab _all_ channels, surely ? list_all puts "\n Do #{$PROGRAM_NAME} --add chn1,chn2,chn3... to create a configuration file" exit end ARGV.delete_if do |arg| begin send(arg) true rescue NoMethodError false end end trap('INT') do STDERR.puts "\nInterrupted\n" raise ArgumentError.new('interrupt') # exit! end begin channels = get_channels STDERR.printf("\n%-40s prgs hts days errs bytes rej\n", "#{Date.today}: #{myname}") channels.each do |channel| next if XmltvOptions.only_new && test(?f, outputfile(channel)) @days_av = Hash.new { |h,k| h[k] = Set.new } @rejects = 0 writer = XmlWriter.new(self) unless check_channel(channel) STDERR.puts "No such channel #{channel}" next end begin nprogs = grab_channel(channel) pda = transform(channel) errors = fix_times(pda) rescue BadChannelError STDERR.puts "Zie #{reject_file_name}" next end writer.write_file(pda, channel) report(channel, nprogs, errors) end rescue SystemExit rescue ::Exception => exc STDERR.puts exc.class, exc.message, exc.backtrace if XmltvOptions.mailto MailSender.sendmail(XmltvOptions.mailto, "Xmltv exception", [exc.class, exc.message, exc.backtrace].join("\n")) end raise ensure end save_config end end def go if XmltvOptions.add || XmltvOptions.del || XmltvOptions.config STDERR.puts "You must specify a site to configure channels" exit end configs = Dir["#{XmltvOptions.basedir}/*"].select{|x| File::Stat.new(x).directory? }.map{|x| File.basename(x)} if configs.empty? STDERR.puts "No configs found in #{XmltvOptions.basedir}" end todo = configs & Sites exit if todo.empty? todo.sort.each do |grabber| # puts grabber begin require "xmltv/sites/#{grabber}" rescue SystemExit => exc raise unless exc.success? end end end module_function :go end if __FILE__ == $PROGRAM_NAME XMLTV::go end