lib/flay.rb in flay-1.2.1 vs lib/flay.rb in flay-1.3.0
- old
+ new
@@ -6,20 +6,19 @@
require 'optparse'
require 'rubygems'
require 'sexp_processor'
require 'ruby_parser'
-abort "update rubygems to >= 1.3.1" unless Gem.respond_to? :find_files
-
class Flay
- VERSION = '1.2.1'
+ VERSION = '1.3.0'
def self.default_options
{
- :fuzzy => false,
- :verbose => false,
+ :diff => false,
:mass => 16,
+ :summary => false,
+ :verbose => false,
}
end
def self.parse_options
options = self.default_options
@@ -35,28 +34,42 @@
opts.on('-h', '--help', 'Display this help.') do
puts opts
exit
end
- opts.on('-f', '--fuzzy', "Attempt to do fuzzy similarities. (SLOW)") do
- options[:fuzzy] = true
+ opts.on('-f', '--fuzzy', "DEAD: fuzzy similarities.") do
+ abort "--fuzzy is no longer supported. Sorry. It sucked."
end
opts.on('-m', '--mass MASS', Integer, "Sets mass threshold") do |m|
options[:mass] = m.to_i
end
- opts.on('-v', '--verbose', "Verbose. Display N-Way diff for ruby.") do
+ opts.on('-v', '--verbose', "Verbose. Show progress processing files.") do
options[:verbose] = true
end
+ opts.on('-d', '--diff', "Diff Mode. Display N-Way diff for ruby.") do
+ options[:diff] = true
+ end
+
+ opts.on('-s', '--summary', "Summarize. Show flay score per file only.") do
+ options[:summary] = true
+ end
+
extensions = ['rb'] + Flay.load_plugins
opts.separator ""
opts.separator "Known extensions: #{extensions.join(', ')}"
- end.parse!
+ begin
+ opts.parse!
+ rescue => e
+ abort "#{e}\n\n#{opts}"
+ end
+ end
+
options
end
def self.expand_dirs_to_files *dirs
extensions = ['rb'] + Flay.load_plugins
@@ -83,10 +96,12 @@
end
@@plugins = plugins.map { |f| File.basename(f, '.rb').sub(/^flay_/, '') }
end
@@plugins
+ rescue
+ # ignore
end
attr_accessor :mass_threshold, :total, :identical, :masses
attr_reader :hashes, :option
@@ -97,41 +112,43 @@
self.identical = {}
self.masses = {}
self.total = 0
self.mass_threshold = @option[:mass]
- require 'ruby2ruby' if @option[:verbose]
+ require 'ruby2ruby' if @option[:diff]
end
def process(*files) # TODO: rename from process - should act as SexpProcessor
files.each do |file|
- warn "Processing #{file}"
+ warn "Processing #{file}" if option[:verbose]
ext = File.extname(file).sub(/^\./, '')
ext = "rb" if ext.nil? || ext.empty?
msg = "process_#{ext}"
unless respond_to? msg then
warn " Unknown file type: #{ext}, defaulting to ruby"
msg = "process_rb"
end
- sexp = begin
- send msg, file
- rescue => e
- warn " #{e.message.strip}"
- warn " skipping #{file}"
- nil
- end
+ begin
+ sexp = begin
+ send msg, file
+ rescue => e
+ warn " #{e.message.strip}"
+ warn " skipping #{file}"
+ nil
+ end
- next unless sexp
+ next unless sexp
- process_sexp sexp
+ process_sexp sexp
+ rescue SyntaxError => e
+ warn " skipping #{file}: #{e.message}"
+ end
end
- process_fuzzy_similarities if option[:fuzzy]
-
analyze
end
def analyze
self.prune
@@ -155,46 +172,10 @@
self.hashes[node.fuzzy_hash] << node
end
end
- def process_fuzzy_similarities
- all_hashes, detected = {}, {}
-
- self.hashes.values.each do |nodes|
- nodes.each do |node|
- next if node.mass > 4 * self.mass_threshold
- # TODO: try out with fuzzy_hash
- # all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] * s.mass }.flatten
- all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] }.flatten
- end
- end
-
- # warn "looking for copy/paste/edit code across #{all_hashes.size} nodes"
-
- all_hashes = all_hashes.to_a
- all_hashes.each_with_index do |(s1, h1), i|
- similar = [s1]
- all_hashes[i+1..-1].each do |(s2, h2)|
- next if detected[h2]
- intersection = h1.intersection h2
- max = [h1.size, h2.size].max
- if intersection.size >= max * 0.60 then
- similarity = s1.similarity(s2)
- if similarity > 0.60 then
- similar << s2
- detected[h2] = true
- else
- p [similarity, s1, s2]
- end
- end
- end
-
- self.hashes[similar.first.hash].push(*similar) if similar.size > 1
- end
- end
-
def prune
# prune trees that aren't duped at all, or are too small
self.hashes.delete_if { |_,nodes| nodes.size == 1 }
# extract all subtree hashes from all nodes
@@ -240,14 +221,37 @@
end
}
groups.flatten.join("\n")
end
+ def summary
+ score = Hash.new 0
+
+ masses.each do |hash, mass|
+ sexps = hashes[hash]
+ mass_per_file = mass.to_f / sexps.size
+ sexps.each do |sexp|
+ score[sexp.file] += mass_per_file
+ end
+ end
+
+ score
+ end
+
def report prune = nil
puts "Total score (lower is better) = #{self.total}"
puts
+ if option[:summary] then
+
+ self.summary.sort_by { |_,v| -v }.each do |file, score|
+ puts "%8.2f: %s" % [score, file]
+ end
+
+ return
+ end
+
count = 0
masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash, mass|
nodes = hashes[hash]
next unless nodes.first.first == prune if prune
puts
@@ -263,19 +267,19 @@
count += 1
puts "%d) %s code found in %p (mass%s = %d)" %
[count, match, node.first, bonus, mass]
- nodes.each_with_index do |node, i|
- if option[:verbose] then
+ nodes.each_with_index do |x, i|
+ if option[:diff] then
c = (?A + i).chr
- puts " #{c}: #{node.file}:#{node.line}"
+ puts " #{c}: #{x.file}:#{x.line}"
else
- puts " #{node.file}:#{node.line}"
+ puts " #{x.file}:#{x.line}"
end
end
- if option[:verbose] then
+ if option[:diff] then
puts
r2r = Ruby2Ruby.new
puts n_way_diff(*nodes.map { |s| r2r.process(s.deep_clone) })
end
end