lib/sitediff.rb in sitediff-0.0.1 vs lib/sitediff.rb in sitediff-0.0.2

- old
+ new

@@ -1,35 +1,42 @@ #!/bin/env ruby -require 'sitediff/cli.rb' -require 'sitediff/config.rb' -require 'sitediff/result.rb' -require 'sitediff/uriwrapper' -require 'sitediff/util/cache' -require 'typhoeus' +require 'sitediff/config' +require 'sitediff/fetch' +require 'sitediff/result' +require 'pathname' require 'rainbow' +require 'yaml' class SiteDiff # path to misc. static files (e.g. erb, css files) FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files') # subdirectory containing all failing diffs DIFFS_DIR = 'diffs' + # files in output + FAILURES_FILE = 'failures.txt' + REPORT_FILE = 'report.html' + SETTINGS_FILE = 'settings.yaml' + # label will be colorized and str will not be. # type dictates the color: can be :success, :error, or :failure - def self.log(str, type=nil, label=nil) + def self.log(str, type=:info, label=nil) label = label ? "[sitediff] #{label}" : '[sitediff]' bg = fg = nil case type - when :success + when :info + when :diff_success bg = :green fg = :black - when :failure + when :diff_failure bg = :red - when :error + when :warn bg = :yellow fg = :black + when :error + bg = :red end label = Rainbow(label) label = label.bg(bg) if bg label = label.fg(fg) if fg puts label + ' ' + str @@ -41,90 +48,99 @@ end def after @config.after['url'] end - def cache=(file) - # FIXME: Non-global cache would be nice - return unless file - if Gem::Version.new(Typhoeus::VERSION) >= Gem::Version.new('0.6.4') - Typhoeus::Config.cache = SiteDiff::Util::Cache.new(file) - else - # Bug, see: https://github.com/typhoeus/typhoeus/pull/296 - SiteDiff::log("Cache unsupported on Typhoeus version < 0.6.4", :failure) + def initialize(config, cache, verbose=true) + @cache = cache + @verbose = verbose + + # Check for single-site mode + validate_opts = {} + if !config.before['url'] && @cache.tag?(:before) + raise SiteDiffException, + "A cached 'before' is required for single-site mode" \ + unless @cache.read_tags.include?(:before) + validate_opts[:need_before] = false end - end + config.validate(validate_opts) - def initialize(config, cache) - config.validate @config = config - self.cache = cache end - # Sanitize an HTML string based on configuration for either before or after - def sanitize(html, pos) - Sanitize::sanitize(html, @config.send(pos)) + # Sanitize HTML + def sanitize(path, read_results) + [:before, :after].map do |tag| + html = read_results[tag].content + config = @config.send(tag) + Sanitizer.new(html, config, :path => path).sanitize + end end - # Queues fetching before and after URLs with a Typhoeus::Hydra instance - # - # Upon completion of both before and after, prints and saves the diff to - # @results. - def queue_read(hydra, path) - # ( :before | after ) => ReadResult object - reads = {} - [:before, :after].each do |pos| - uri = UriWrapper.new(send(pos) + path) + # Process a set of read results + def process_results(path, read_results) + if error = read_results[:before].error || read_results[:after].error + diff = Result.new(path, nil, nil, error) + else + diff = Result.new(path, *sanitize(path, read_results), nil) + end + @results[path] = diff - uri.queue(hydra) do |res| - reads[pos] = res - next unless reads.size == 2 - - # we have read both before and after; calculate diff - if error = reads[:before].error || reads[:after].error - diff = Result.new(path, nil, nil, error) - else - diff = Result.new(path, sanitize(reads[:before].content, :before), - sanitize(reads[:after].content,:after), nil) - end - diff.log - @results[path] = diff - end + # Print results in order! + while next_diff = @results[@ordered.first] + next_diff.log(@verbose) + @ordered.shift end end - # Perform the comparison + # Perform the comparison, populate @results and return the number of failing + # paths (paths with non-zero diff). def run - # Map of path -> Result object, queue_read sets callbacks to populate this + # Map of path -> Result object, populated by process_results @results = {} + @ordered = @config.paths.dup - hydra = Typhoeus::Hydra.new(max_concurrency: 3) - @config.paths.each { |path| queue_read(hydra, path) } - hydra.run + unless @cache.read_tags.empty? + SiteDiff.log("Using sites from cache: " + + @cache.read_tags.sort.join(', ')) + end + fetcher = Fetch.new(@cache, @config.paths, + :before => before, :after => after) + fetcher.run(&self.method(:process_results)) + # Order by original path order @results = @config.paths.map { |p| @results[p] } + return results.map{ |r| r unless r.success? }.compact.length end # Dump results to disk - def dump(dir, report_before, report_after, failing_paths) + def dump(dir, report_before, report_after) report_before ||= before report_after ||= after - FileUtils.mkdir_p(dir) + dir = Pathname.new(dir) + dir.mkpath unless dir.directory? # store diffs of each failing case, first wipe out existing diffs - diff_dir = File.join(dir, DIFFS_DIR) - FileUtils.rm_rf(diff_dir) + diff_dir = dir + DIFFS_DIR + diff_dir.rmtree if diff_dir.exist? results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE } - SiteDiff::log "All diff files were dumped inside #{dir}" + SiteDiff::log "All diff files were dumped inside #{dir.expand_path}" # store failing paths - SiteDiff::log "Writing failures to #{failing_paths}" - File.open(failing_paths, 'w') do |f| + failures = dir + FAILURES_FILE + SiteDiff::log "Writing failures to #{failures.expand_path}" + failures.open('w') do |f| results.each { |r| f.puts r.path unless r.success? } end # create report of results - report = Diff::generate_html_report(results, report_before, report_after) - File.open(File.join(dir, "/report.html") , 'w') { |f| f.write(report) } + report = Diff::generate_html_report(results, report_before, report_after, + @cache) + dir.+(REPORT_FILE).open('w') { |f| f.write(report) } + + # serve some settings + settings = { 'before' => report_before, 'after' => report_after, + 'cached' => @cache.read_tags.map { |t| t.to_s } } + dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) } end end