lib/flay.rb in flay-1.2.1 vs lib/flay.rb in flay-1.3.0

- old
+ new

@@ -6,20 +6,19 @@ require 'optparse' require 'rubygems' require 'sexp_processor' require 'ruby_parser' -abort "update rubygems to >= 1.3.1" unless Gem.respond_to? :find_files - class Flay - VERSION = '1.2.1' + VERSION = '1.3.0' def self.default_options { - :fuzzy => false, - :verbose => false, + :diff => false, :mass => 16, + :summary => false, + :verbose => false, } end def self.parse_options options = self.default_options @@ -35,28 +34,42 @@ opts.on('-h', '--help', 'Display this help.') do puts opts exit end - opts.on('-f', '--fuzzy', "Attempt to do fuzzy similarities. (SLOW)") do - options[:fuzzy] = true + opts.on('-f', '--fuzzy', "DEAD: fuzzy similarities.") do + abort "--fuzzy is no longer supported. Sorry. It sucked." end opts.on('-m', '--mass MASS', Integer, "Sets mass threshold") do |m| options[:mass] = m.to_i end - opts.on('-v', '--verbose', "Verbose. Display N-Way diff for ruby.") do + opts.on('-v', '--verbose', "Verbose. Show progress processing files.") do options[:verbose] = true end + opts.on('-d', '--diff', "Diff Mode. Display N-Way diff for ruby.") do + options[:diff] = true + end + + opts.on('-s', '--summary', "Summarize. Show flay score per file only.") do + options[:summary] = true + end + extensions = ['rb'] + Flay.load_plugins opts.separator "" opts.separator "Known extensions: #{extensions.join(', ')}" - end.parse! + begin + opts.parse! + rescue => e + abort "#{e}\n\n#{opts}" + end + end + options end def self.expand_dirs_to_files *dirs extensions = ['rb'] + Flay.load_plugins @@ -83,10 +96,12 @@ end @@plugins = plugins.map { |f| File.basename(f, '.rb').sub(/^flay_/, '') } end @@plugins + rescue + # ignore end attr_accessor :mass_threshold, :total, :identical, :masses attr_reader :hashes, :option @@ -97,41 +112,43 @@ self.identical = {} self.masses = {} self.total = 0 self.mass_threshold = @option[:mass] - require 'ruby2ruby' if @option[:verbose] + require 'ruby2ruby' if @option[:diff] end def process(*files) # TODO: rename from process - should act as SexpProcessor files.each do |file| - warn "Processing #{file}" + warn "Processing #{file}" if option[:verbose] ext = File.extname(file).sub(/^\./, '') ext = "rb" if ext.nil? || ext.empty? msg = "process_#{ext}" unless respond_to? msg then warn " Unknown file type: #{ext}, defaulting to ruby" msg = "process_rb" end - sexp = begin - send msg, file - rescue => e - warn " #{e.message.strip}" - warn " skipping #{file}" - nil - end + begin + sexp = begin + send msg, file + rescue => e + warn " #{e.message.strip}" + warn " skipping #{file}" + nil + end - next unless sexp + next unless sexp - process_sexp sexp + process_sexp sexp + rescue SyntaxError => e + warn " skipping #{file}: #{e.message}" + end end - process_fuzzy_similarities if option[:fuzzy] - analyze end def analyze self.prune @@ -155,46 +172,10 @@ self.hashes[node.fuzzy_hash] << node end end - def process_fuzzy_similarities - all_hashes, detected = {}, {} - - self.hashes.values.each do |nodes| - nodes.each do |node| - next if node.mass > 4 * self.mass_threshold - # TODO: try out with fuzzy_hash - # all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] * s.mass }.flatten - all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] }.flatten - end - end - - # warn "looking for copy/paste/edit code across #{all_hashes.size} nodes" - - all_hashes = all_hashes.to_a - all_hashes.each_with_index do |(s1, h1), i| - similar = [s1] - all_hashes[i+1..-1].each do |(s2, h2)| - next if detected[h2] - intersection = h1.intersection h2 - max = [h1.size, h2.size].max - if intersection.size >= max * 0.60 then - similarity = s1.similarity(s2) - if similarity > 0.60 then - similar << s2 - detected[h2] = true - else - p [similarity, s1, s2] - end - end - end - - self.hashes[similar.first.hash].push(*similar) if similar.size > 1 - end - end - def prune # prune trees that aren't duped at all, or are too small self.hashes.delete_if { |_,nodes| nodes.size == 1 } # extract all subtree hashes from all nodes @@ -240,14 +221,37 @@ end } groups.flatten.join("\n") end + def summary + score = Hash.new 0 + + masses.each do |hash, mass| + sexps = hashes[hash] + mass_per_file = mass.to_f / sexps.size + sexps.each do |sexp| + score[sexp.file] += mass_per_file + end + end + + score + end + def report prune = nil puts "Total score (lower is better) = #{self.total}" puts + if option[:summary] then + + self.summary.sort_by { |_,v| -v }.each do |file, score| + puts "%8.2f: %s" % [score, file] + end + + return + end + count = 0 masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash, mass| nodes = hashes[hash] next unless nodes.first.first == prune if prune puts @@ -263,19 +267,19 @@ count += 1 puts "%d) %s code found in %p (mass%s = %d)" % [count, match, node.first, bonus, mass] - nodes.each_with_index do |node, i| - if option[:verbose] then + nodes.each_with_index do |x, i| + if option[:diff] then c = (?A + i).chr - puts " #{c}: #{node.file}:#{node.line}" + puts " #{c}: #{x.file}:#{x.line}" else - puts " #{node.file}:#{node.line}" + puts " #{x.file}:#{x.line}" end end - if option[:verbose] then + if option[:diff] then puts r2r = Ruby2Ruby.new puts n_way_diff(*nodes.map { |s| r2r.process(s.deep_clone) }) end end