=begin Copyright 2010, Roger Pack This file is part of Sensible Cinema. Sensible Cinema is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Sensible Cinema is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Sensible Cinema. If not, see . =end require 'sane' class EdlParser EDL_DIR = File.expand_path(__DIR__ + "/../zamples/edit_decision_lists/dvds") if File::ALT_SEPARATOR EDL_DIR.gsub! File::SEPARATOR, File::ALT_SEPARATOR # to_filename... end # returns {"mutes" => [["00:00", "00:00", string1, string2, ...], ...], "blank_outs" -> [...], "url" => ...} def self.parse_file filename, expand = true output = parse_string File.read(filename), filename, [] # now respect a few options if relative = output["take_from_relative_file"] new_filename = File.dirname(filename) + '/' + relative new_input = parse_file new_filename output.merge! new_input end require_relative 'gui/sensible-cinema-dependencies' # for download method... if expand if output["from_url"] # replacement downloaded = SensibleSwing::MainWindow.download_to_string(output["from_url"]) output = parse_string downloaded # full replacement end if imdb_id = output["imdb_id"] parse_imdb output, imdb_id end end output end def self.parse_imdb output, imdb_id require_relative 'convert_thirty_fps' url = "http://www.imdb.com/title/#{imdb_id}/parentalguide" all = SensibleSwing::MainWindow.download_to_string(url) header, violence_word, violence_section, profanity_word, profanity_section, alcohol_word, alcohol_section, frightening_word, frightening_section = sections = all.split(/(Violence|Profanity|Alcohol|Frightening)/) header = sections.shift all ={} while(!sections.empty?) # my klugey to_hash method word_type = sections.shift settings = sections.shift assert word_type.in? ['Violence', 'Profanity', 'Alcohol', 'Frightening'] all[word_type] = settings end # blank_outs or mutes for each... # TODO make the -> optional split_into_timestamps = /([\d:]+(?:\.\d+|))\W*->\W*([\d:]+(?:\.\d+|))([^\d\n]+)/ for type, settings in all settings.scan(split_into_timestamps) do |begin_ts, end_ts, description| puts "parsing from wiki imdb entry violence: #{begin_ts} #{end_ts} #{description} #{type}" start_seconds = translate_string_to_seconds begin_ts end_seconds = translate_string_to_seconds end_ts # convert from 30 to 29.97 fps ... we presume ... start_seconds = ConvertThirtyFps.from_twenty_nine_nine_seven start_seconds start_seconds = ("%.02f" % start_seconds).to_f # round start_seconds = translate_time_to_human_readable start_seconds, true end_seconds = ConvertThirtyFps.from_twenty_nine_nine_seven end_seconds end_seconds = ("%.02f" % end_seconds).to_f # round end_seconds = translate_time_to_human_readable end_seconds, true p end_seconds if type == 'Profanity' output['mutes'] << [start_seconds, end_seconds] else output['blank_outs'] << [start_seconds, end_seconds] end end end end private def self.download full_url, to_here require 'open-uri' writeOut = open(to_here, "wb") writeOut.write(open(full_url).read) writeOut.close end # better eye-ball these before letting people run them, eh? TODO # but I couldn't think of any other way to parse the files tho def self.parse_string string, filename, ok_categories_array = [] string = '{' + string + "\n}" if filename raw = eval(string, binding, filename) else raw = eval string end raise SyntaxError.new("maybe missing quotation marks?" + string) if raw.keys.contain?(nil) # mutes and blank_outs need to be special parsed into arrays... mutes = raw["mutes"] || [] blanks = raw["blank_outs"] || [] raw["mutes"] = convert_to_timestamp_arrays(mutes, ok_categories_array) raw["blank_outs"] = convert_to_timestamp_arrays(blanks, ok_categories_array) raw end # converts "blanks" => ["00:00:00", "00", "reason", "01", "01", "02", "02"] into sane arrays, also filters based on category, though disabled for production def self.convert_to_timestamp_arrays array, ok_categories_array out = [] while(single_element = extract_entry!(array)) # assume that it (could be, at least) start_time, end_time, category, number category = single_element[-2] category_number = single_element[-1] include = true if ok_categories_array.index([category, category_number]) include = false elsif ok_categories_array.index([category]) include = false elsif ok_categories_array.detect{|cat, setting| setting.is_a? Fixnum} for cat, setting in ok_categories_array if cat == category && setting.is_a?(Fixnum) # check for a number for filtering out based on level if category_number.to_i.to_s == category_number as_number = category_number.to_i if as_number < setting include = false end end end end end out << single_element if include end out end #TimeStamp = /(^\d+:\d\d[\d:\.]*$|\d+)/ # this one also allows for 4444 [?] and also weirdness like "don't kill the nice butterfly 2!" ... TimeStamp = /(^\d+:\d\d[\d:\.]*|\d+\.\d+)$/ # allow 00:00:00 00:00:00.0 1222.4 # disallow 1905 too but in the code # starts with a digit, has at least one colon followed by two digits,then some combo of digits and colons and periods... def self.extract_entry! from_this return nil if from_this.length == 0 # two digits, then whatever else you see, that's not a digit... out = from_this.shift(2) out.each{|d| unless d =~ TimeStamp raise SyntaxError.new('non timestamp? ' + d) end } while(from_this[0] && from_this[0] !~ TimeStamp) raise SyntaxError.new('straight digits not allowed use 1000.0 instead') if from_this[0] =~ /^\d+$/ out << from_this.shift end out end def self.get_secs timestamp_string_begin, timestamp_string_end, add_begin, add_end, splits answers = [] unless timestamp_string_begin raise 'non begin' end unless timestamp_string_end raise 'non end' end for type, offset, multiplier in [[timestamp_string_begin, add_begin, -1], [timestamp_string_end, add_end, 1]] original_secs = translate_string_to_seconds(type) + offset # now if splits is 900 and we'are at 909, then we're just 9 closest_split_idx = splits.reverse.index{|t| t < original_secs} if closest_split_idx closest_split = splits.reverse[closest_split_idx] # add some extra seconds onto these if they're "past" a split, too original_secs = original_secs - closest_split + multiplier * (splits.length - closest_split_idx) original_secs = [0, original_secs].max # no negatives allowed :) end answers << original_secs end answers end public # called later, from external # divides up mutes and blanks so that they don't overlap, preferring blanks over mutes # returns it like [[start,end,type], [s,e,t]...] type like either :blank and :mute # [[70.0, 73.0, :blank], [378.0, 379.1, :mute]] def self.convert_incoming_to_split_sectors incoming, add_this_to_all_ends = 0, subtract_this_from_beginnings = 0, splits = [] raise if subtract_this_from_beginnings < 0 raise if add_this_to_all_ends < 0 if splits != [] # allow it to do all the double checks we later skip, just in case :) self.convert_incoming_to_split_sectors incoming end mutes = incoming["mutes"] || {} blanks = incoming["blank_outs"] || {} mutes = mutes.map{|k, v| get_secs(k, v, -subtract_this_from_beginnings, add_this_to_all_ends, splits) + [:mute]} blanks = blanks.map{|k, v| get_secs(k, v, -subtract_this_from_beginnings, add_this_to_all_ends, splits) + [:blank]} combined = (mutes+blanks).sort # detect overlap... previous = nil combined.each_with_index{|current, idx| s,e,t = current if e < s raise SyntaxError.new("detected an end before a start: #{e} < #{s}") if e < s unless splits.length > 0 end if previous ps, pe, pt = previous if (s < pe) raise SyntaxError.new("detected an overlap #{[s,e,t].join(' ')} #{previous.join(' ')}") unless splits.length > 0 # our start might be within the previous' in which case its their start, with (greater of our, their ending) preferred_end = [e,pe].max preferred_type = [t,pt].detect{|t| t == :blank} || :mute # prefer blank to mute combined[idx-1] = [ps, preferred_end, preferred_type] combined[idx] = nil # allow it to be culled later end end previous = current } combined.compact end # this one is 1:01:02.0 => 36692.0 # its reverse is: translate_time_to_human_readable def self.translate_string_to_seconds s # might actually already be a float, or int, depending on the yaml # int for 8 => 9 and also for 1:09 => 1:10 if s.is_a? Numeric return s.to_f end # s is like 1:01:02.0 total = 0.0 seconds = nil begin seconds = s.split(":")[-1] rescue Exception => e p 'failed!', s raise e end raise unless seconds =~ /^\d+(|[,.]\d+)$/ seconds.gsub!(',', '.') total += seconds.to_f minutes = s.split(":")[-2] || "0" total += 60 * minutes.to_i hours = s.split(":")[-3] || "0" total += 60* 60 * hours.to_i total end # its reverse: translate_string_to_seconds def self.translate_time_to_human_readable seconds, force_hour_stamp = false # 3600 => "1:00:00" out = '' hours = seconds.to_i / 3600 if hours > 0 || force_hour_stamp out << "%d" % hours out << ":" end seconds = seconds - hours*3600 minutes = seconds.to_i / 60 out << "%02d" % minutes seconds = seconds - minutes * 60 out << ":" # avoid an ugly .0 at the end if seconds == seconds.to_i out << "%02d" % seconds else out << "%05.2f" % seconds # man that printf syntax is tricky... end end def self.all_edl_files_parsed use_all_not_just_dvds dir = EDL_DIR dir += "/.." if use_all_not_just_dvds Dir[dir + '/**/*.txt'].map{|filename| begin parsed = parse_file(filename) [filename, parsed] rescue SyntaxError => e # ignore poorly formed edit lists for the auto choose phase... p 'warning, unable to parse a file:' + filename + " " + e.to_s nil end }.compact end # returns single matching filename def self.find_single_edit_list_matching use_all = false matching = all_edl_files_parsed(use_all).map{|filename, parsed| yield(parsed) ? filename : nil }.compact if matching.length == 1 file = matching[0] p "selecting the one only matching EDL: #{file}" file elsif matching.length > 1 p "found multiple matches for media? #{matching.inspect}" nil else nil end end def self.single_edit_list_matches_dvd dvd_id return nil unless dvd_id find_single_edit_list_matching {|parsed| parsed["disk_unique_id"] == dvd_id } end end # == 1.8.7 1.9 Symbol compat class Symbol # Standard in ruby 1.9. See official documentation[http://ruby-doc.org/core-1.9/classes/Symbol.html] def <=>(with) return nil unless with.is_a? Symbol to_s <=> with.to_s end unless method_defined? :"<=>" end if $0 == __FILE__ p 'syntax: filename' require 'rubygems' require 'sane' p EdlParser.parse_file(*ARGV) end