lib/flay.rb in flay-1.3.0 vs lib/flay.rb in flay-1.4.0
- old
+ new
@@ -7,11 +7,11 @@
require 'rubygems'
require 'sexp_processor'
require 'ruby_parser'
class Flay
- VERSION = '1.3.0'
+ VERSION = '1.4.0'
def self.default_options
{
:diff => false,
:mass => 16,
@@ -168,11 +168,11 @@
def process_sexp pt
pt.deep_each do |node|
next unless node.any? { |sub| Sexp === sub }
next if node.mass < self.mass_threshold
- self.hashes[node.fuzzy_hash] << node
+ self.hashes[node.structural_hash] << node
end
end
def prune
# prune trees that aren't duped at all, or are too small
@@ -180,11 +180,11 @@
# extract all subtree hashes from all nodes
all_hashes = {}
self.hashes.values.each do |nodes|
nodes.each do |node|
- node.all_subhashes.each do |h|
+ node.all_structural_subhashes.each do |h|
all_hashes[h] = true
end
end
end
@@ -290,54 +290,18 @@
class String
attr_accessor :group
end
class Sexp
- def mass
- @mass ||= self.structure.flatten.size
+ def structural_hash
+ @structural_hash ||= self.structure.hash
end
- alias :uncached_structure :structure
- def structure
- @structure ||= self.uncached_structure
- end
-
- def similarity o
- l, s, r = self.compare_to o
- (2.0 * s) / (2.0 * s + l + r)
- end
-
- def compare_to they
- l = s = r = 0
-
- l_sexp, l_lits = self.partition { |o| Sexp === o }
- r_sexp, r_lits = they.partition { |o| Sexp === o }
-
- l += (l_lits - r_lits).size
- s += (l_lits & r_lits).size
- r += (r_lits - l_lits).size
-
- # TODO: I think this is wrong, since it isn't positional. What to do?
- l_sexp.zip(r_sexp).each do |l_sub, r_sub|
- next unless l_sub && r_sub # HACK
- l2, s2, r2 = l_sub.compare_to r_sub
- l += l2
- s += s2
- r += r2
- end
-
- return l, s, r
- end
-
- def fuzzy_hash
- @fuzzy_hash ||= self.structure.hash
- end
-
- def all_subhashes
+ def all_structural_subhashes
hashes = []
self.deep_each do |node|
- hashes << node.fuzzy_hash
+ hashes << node.structural_hash
end
hashes
end
def deep_each(&block)
@@ -350,37 +314,8 @@
def each_sexp
self.each do |sexp|
next unless Sexp === sexp
yield sexp
- end
- end
-end
-
-class Array
- def intersection other
- intersection, start = [], 0
- other_size = other.length
- self.each_with_index do |m, i|
- (start...other_size).each do |j|
- n = other.at j
- if m == n then
- intersection << m
- start = j + 1
- break
- end
- end
- end
- intersection
- end
-
- def triangle # TODO: use?
- max = self.size
- (0...max).each do |i|
- o1 = at(i)
- (i+1...max).each do |j|
- o2 = at(j)
- yield o1, o2
- end
end
end
end