#!/usr/bin/env ruby # encoding: UTF-8 require 'bundler/setup' require 'image_optim' require 'image_optim/cmd' require 'progress' require 'shellwords' require 'gdbm' require 'digest' require 'erb' require 'ostruct' DIR = 'tmp'.freeze Pathname(DIR).mkpath Array.class_eval do # For an array of arrays with possible values yields arrays with all # combinations of values # # [[1, 2], 3, [4, 5]].variants{ |v| p v } # # [1, 3, 4] # # [1, 3, 5] # # [2, 3, 4] # # [2, 3, 5] def variants(&block) if block if empty? yield([]) else head, *tail = map(&method(:Array)) head.product(*tail, &block) end self else enum_for(:variants) end end # Sum elements or results of running block on elements def sum(initial = 0, &block) if block reduce(initial){ |memo, item| memo + block[item] } else reduce(initial, :+) end end end Hash.class_eval do # For a hash with arrays of possible values yields hashes with all # combinations of keys mapped to value # # {:a => [1, 2], :b => 3, :c => [4, 5]}.variants{ |v| p v } # # {:a=>1, :b=>3, :c=>4} # # {:a=>1, :b=>3, :c=>5} # # {:a=>2, :b=>3, :c=>4} # # {:a=>2, :b=>3, :c=>5} def variants if block_given? if empty? yield({}) else keys, values = to_a.transpose values.variants do |variant| yield Hash[keys.zip(variant)] end end self else enum_for(:variants) end end end Process.times.class.class_eval do def sum utime + stime + cutime + cstime end end ImageOptim::Path.class_eval do def shellescape to_s.shellescape end def digest @digest ||= Digest::SHA256.file(to_s).hexdigest end def etag [mtime, digest] end end # Analyse efficency of workers class Analyser Cmd = ImageOptim::Cmd HashHelpers = ImageOptim::HashHelpers # Caching entries using GDBM class Cache DB = GDBM.new("#{DIR}/worker-analysis.db") class << self def get(context, key, etag, &block) full_key = [context, key] if block get!(full_key, etag) || set!(full_key, etag, &block) else get!(full_key, etag) end end def set(context, key, etag, &block) set!([context, key], etag, &block) end private def get!(key, etag) raw = DB[Marshal.dump(key)] return unless raw entry = Marshal.load(raw) return unless entry[1] == etag entry[0] end def set!(key, etag, &_block) value = yield DB[Marshal.dump(key)] = Marshal.dump([value, etag]) value end end end # Delegate to worker with short id class WorkerVariant < DelegateClass(ImageOptim::Worker) attr_reader :name, :id, :cons_id def initialize(klass, image_optim, options) allow_consecutive_on = Array(options.delete(:allow_consecutive_on)) @image_optim = image_optim @name = klass.bin_sym.to_s + options_string(options) __setobj__(klass.new(image_optim, options)) @id = klass.bin_sym.to_s + options_string(self.options) @cons_id = [klass, allow_consecutive_on.map{ |key| [key, send(key)] }] end def etag [ id, bin_versions, source_digest, ] end private def bin_versions @bin_versions ||= used_bins.map do |name| @image_optim.resolve_bin!(name).to_s end end def source_digest @digest ||= begin source_path = __getobj__.method(:optimize).source_location[0] Digest::SHA256.file(source_path).hexdigest end end def options_string(options) return '' if options.empty? "(#{options.sort.map{ |k, v| "#{k}:#{v.inspect}" }.join(', ')})" end end # One worker result StepResult = Struct.new(*[ :worker_id, :success, :time, :src_size, :dst_size, :cache, ]) do def self.run(src, worker) dst = src.temp_path start = Process.times.sum success = worker.optimize(src, dst) time = Process.times.sum - start dst_size = success ? dst.size : nil digest = (success ? dst : src).digest cache = digest.sub(/../, '\0/') + ".#{src.image_format}" result = new(worker.id, success, time, src.size, dst_size, cache) if success path = result.path unless path.exist? path.dirname.mkpath dst.rename(path) end end result end def size success ? dst_size : src_size end def path ImageOptim::Path.convert("#{DIR}/worker-analysis/#{cache}") end def inspect "" end end # Chain of workers result ChainResult = Struct.new(*[ :format, :steps, :difference, ]) do def worker_ids steps.map(&:worker_id) end def time steps.sum(&:time) end def src_size steps.first.src_size end def dst_size steps.last.size end def ratio dst_size.to_f / src_size end def inspect "" end end # Run all possible worker chains class WorkerRunner def initialize(path, workers) @path = ImageOptim::Path.convert(path) @workers = workers end def results results = [] run_workers(@path, @workers){ |result| results << result } run_cache.clear results end private def run_cache @run_cache ||= Hash.new{ |h, k| h[k] = {} } end def with_progress(workers, last_result, &block) if !last_result || last_result.steps.length < 3 workers.with_progress(&block) else workers.each(&block) end end def run_workers(src, workers, last_result = nil, &block) with_progress(workers, last_result) do |worker| worker_result, result_image = run_worker(src, worker) steps = (last_result ? last_result.steps : []) + [worker_result] chain_result = ChainResult.new(src.image_format, steps) chain_result.difference = difference_with(result_image) yield chain_result workers_left = workers.reject do |w| w.cons_id == worker.cons_id || w.run_order < worker.run_order end run_workers(result_image, workers_left, chain_result, &block) end end def run_worker(src, worker) run_cache[:run][[src.digest, worker.id]] ||= begin cache_args = [:result, [src.digest, worker.id], worker.etag] result = Cache.get(*cache_args) if !result || (result.success && !result.path.exist?) result = Cache.set(*cache_args) do StepResult.run(src, worker) end end [result, result.success ? result.path : src] end end def difference_with(other) run_cache[:difference][other.digest] ||= Cache.get(:difference, [@path.digest, other.digest].sort, nil) do images = [flatten_animation(@path), flatten_animation(other)] alpha_presence = images.map do |image| Cmd.capture("identify -format '%A' #{image.shellescape}") end if alpha_presence.uniq.length == 2 images.map!{ |image| underlay_noise(image) } end nrmse_command = %W[ convert #{images[0]} -auto-orient #{images[1]} -auto-orient -metric RMSE -compare -format %[distortion] info: ].shelljoin nrmse = Cmd.capture(nrmse_command).to_f unless $CHILD_STATUS.success? fail "failed comparison of #{@path} with #{other}" end nrmse end end def flatten_animation(image) run_cache[:flatten][image.digest] ||= begin if image.image_format == :gif flattened = image.temp_path Cmd.run(*%W[ convert #{image.shellescape} -coalesce -append #{flattened.shellescape} ]) unless $CHILD_STATUS.success? fail "failed flattening of #{image}" end flattened else image end end end def underlay_noise(image) run_cache[:noise][image.digest] ||= begin with_noise = image.temp_path Cmd.run(*%W[ convert #{image.shellescape} +noise Random #{image.shellescape} -flatten -alpha off #{with_noise.shellescape} ]) unless $CHILD_STATUS.success? fail "failed underlaying noise to #{image}" end with_noise end end end # Helper for producing statistics class Stats # Calculate statistics for chain class Chain attr_reader :worker_stats attr_reader :unused_workers attr_reader :entry_count attr_reader :original_size, :optimized_size, :ratio, :avg_ratio attr_reader :avg_difference, :max_difference, :warn_level attr_reader :time, :avg_time, :speed def initialize(worker_ids, results, ids2names) @worker_stats = build_worker_stats(worker_ids, results, ids2names) @unused_workers = worker_stats.any?(&:unused?) @entry_count = results.count @original_size = results.sum(&:src_size) @optimized_size = results.sum(&:dst_size) @ratio = optimized_size.to_f / original_size @avg_ratio = results.sum(&:ratio) / results.length @avg_difference = results.sum(&:difference) / results.length @max_difference = results.map(&:difference).max @time = results.sum(&:time) @avg_time = time / results.length @warn_level = calculate_warn_level @speed = calculate_speed end private def build_worker_stats(worker_ids, results, ids2names) steps_by_worker_id = results.flat_map(&:steps).group_by(&:worker_id) worker_ids.map do |worker_id| worker_name = ids2names[worker_id] || worker_id Worker.new(worker_name, steps_by_worker_id[worker_id]) end end def calculate_warn_level case when max_difference >= 0.1 then 'high' when max_difference >= 0.01 then 'medium' when max_difference >= 0.001 then 'low' end end def calculate_speed case when time > 0 then (original_size - optimized_size) / time when original_size == optimized_size then 0 else 1.0 / 0.0 end end end # Worker usage class Worker attr_reader :name attr_reader :success_count attr_reader :time, :avg_time def initialize(name, steps) @name = name @success_count = steps.count(&:success) @time = steps.sum(&:time) @avg_time = time / steps.length end def unused? success_count.zero? end end attr_reader :name, :results, :ids2names def initialize(name, results, ids2names) @name = name.to_s @results = results @ids2names = ids2names end def each_chain(&block) chains = results.group_by(&:worker_ids).map do |worker_ids, results| Chain.new(worker_ids, results, ids2names) end chains.sort_by!{ |chain| [chain.optimized_size, chain.time] } chains.each(&block) end end def initialize(option_variants) option_variants = HashHelpers.deep_symbolise_keys(option_variants) image_optim = ImageOptim.new @workers_by_format = Hash.new{ |h, k| h[k] = [] } ImageOptim::Worker.klasses.each do |klass| worker_options_config = option_variants.delete(klass.bin_sym) || {} allow_consecutive_on = worker_options_config.delete(:allow_consecutive_on) worker_option_variants = case worker_options_config when Array worker_options_config when Hash worker_options_config.variants else fail "Array or Hash expected, got #{worker_options_config}" end worker_option_variants.each do |options| options = HashHelpers.deep_symbolise_keys(options) options[:allow_consecutive_on] = allow_consecutive_on worker = WorkerVariant.new(klass, image_optim, options) worker.image_formats.each do |format| @workers_by_format[format] << worker end end end log_workers_by_format fail "unknown variants: #{option_variants}" unless option_variants.empty? end def analyse(paths) results = collect_results(paths) template = ERB.new(template_path.read, nil, '>') by_format = results.group_by(&:format) formats = by_format.keys.sort basenames = Hash[formats.map do |format| [format, "worker-analysis-#{format}.html"] end] formats.each do |format| stats = Stats.new('all', by_format[format], worker_ids2names) path = FSPath("#{DIR}/#{basenames[format]}") model = { :stats_format => format, :stats => stats, :format_links => basenames, :template_dir => template_path.dirname.relative_path_from(path.dirname), } html = template.result(OpenStruct.new(model).instance_eval{ binding }) path.write(html) puts "Created #{path}" end end private def worker_ids2names Hash[@workers_by_format.values.flatten.map do |worker| [worker.id, worker.name] end] end def collect_results(paths) process_paths(paths).shuffle.with_progress.flat_map do |path| WorkerRunner.new(path, workers_for_image(path)).results end end def process_paths(paths) paths = paths.map{ |path| ImageOptim::Path.convert(path) } paths.select!{ |path| path.exist? || warn("#{path} doesn't exits") } paths.select!{ |path| path.file? || warn("#{path} is not a file") } paths.select!{ |path| path.image_format || warn("#{path} is not an image") } paths.select! do |path| workers_for_image(path) || warn("#{path} can't be handled by any worker") end paths end def workers_for_image(path) @workers_by_format[ImageOptim::Path.convert(path).image_format] end def log_workers_by_format @workers_by_format.each do |format, workers| puts "#{format}:" workers.sort_by.with_index{ |w, i| [w.run_order, i] }.each do |worker| puts " #{worker.name} [#{worker.run_order}]" end end end def template_path FSPath("#{File.dirname(__FILE__)}/template/#{File.basename(__FILE__)}.erb") end end def option_variants path = '.analysis_variants.yml' case h = YAML.load_file(path) when Hash then h when false then {} else abort "expected a hash in #{path}" end rescue Errno::ENOENT => e warn e {} end analyser = Analyser.new(option_variants) if ARGV.empty? abort <<-HELP Specify paths for analysis. Example of `.analysis_variants.yml`: jpegtran: # 3 worker variants - jpegrescan: true - progressive: true - progressive: false optipng: # 6 worker variants by combining options level: [6, 7] interlace: [true, false, nil] gifsicle: # allow variants with different interlace to run consecutively allow_consecutive_on: interlace interlace: [true, false] careful: [true, false] # other workers will be used with default options HELP end analyser.analyse(ARGV)