Sha256: af514a30af46cb566d314e77af24b90e793d6a09c40ef09823b0397cd16963e3

Contents?: true

Size: 1.58 KB

Versions: 2

Compression:

Stored size: 1.58 KB

Contents

require 'sitediff/sanitize/regexp'
require 'pathname'
require 'set'

class SiteDiff
# Find appropriate rules for a given site
class Rules
  def initialize(config, disabled = false)
    @disabled = disabled
    @config = config
    find_sanitization_candidates
    @rules = Hash.new { |h, k| h[k] = Set.new }
  end

  def find_sanitization_candidates
    @candidates = Set.new

    rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
    rules_dir.children.each do |f|
      next unless f.file? && f.extname == '.yaml'
      conf = YAML.load_file(f)
      @candidates.merge(conf['sanitization'])
    end
  end

  def handle_page(tag, html, doc)
    found = find_rules(html, doc)
    @rules[tag].merge(found)
  end

  # Yield a set of rules that seem reasonable for this HTML
  # assumption: the YAML file is a list of regexp rules only
  def find_rules(html, doc)
    rules = []

    return @candidates.select do |rule|
      re = SiteDiff::Sanitizer::Regexp.create(rule)
      re.applies?(html, doc)
    end
  end

  # Find all rules from all rulesets that apply for all pages
  def add_config
    have_both = @rules.include?(:before)

    r1, r2 = *@rules.values_at(:before, :after)
    if have_both
      add_section('before', r1 - r2)
      add_section('after', r2 - r1)
      add_section(nil, r1 & r2)
    else
      add_section(nil, r2)
    end
  end

  def add_section(name, rules)
    return if rules.empty?
    conf = name ? @config[name] : @config
    if @disabled
      rules.each { |r| r['disabled'] = true }
    end
    conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
  end
end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
sitediff-0.0.3 lib/sitediff/rules.rb
sitediff-0.0.2 lib/sitediff/rules.rb