#!/usr/bin/env ruby
require 'RMagick'
require 'progressbar'
require 'base64'
class DupeMagick
attr_accessor :source_vector, :target_vector, :source_cube, :target_cube, :distance
def compare_images(source, target, params)
source_image = process_image(source, params)
target_image = process_image(target, params)
@source_vector = create_vector_from_image(source_image)
@source_cube = create_cube_from_vector(@source_vector)
@target_vector = create_vector_from_image(target_image)
@target_cube = create_cube_from_vector(@target_vector)
@distance = calculate_euclidian_distance(@source_cube, @target_cube)
puts "Images are " + euclidian_plain_language(@distance) + ", score: " + @distance.to_i.to_s
@distance.to_i
end
def find_duplicates(source_file, target_path)
source_image = process_image(source_file, {:geometry => '8x8!'})
@source_vector = create_vector_from_image(source_image)
@source_cube = create_cube_from_vector(@source_vector)
files = Dir[target_path]
pbar = do_make_progress_bar("#{files.size} images", files.size)
results = {}
files.each do |target|
target_image = process_image(target, {:geometry => '8x8!'})
@target_vector = create_vector_from_image(target_image)
@target_cube = create_cube_from_vector(@target_vector)
@distance = calculate_euclidian_distance(@source_cube, @target_cube)
results[target] = @distance
pbar.inc
end
pbar.finish
write_results(source_file, results)
end
def serialize(obj)
# for storing image data as a blob in mysql etc
Base64.encode64(Marshal.dump(obj))
end
def deserialize(obj)
# for retrieving image data from a blob in mysql etc
Base64.decode64(Marshal.load(obj))
end
private
def read_image(file)
Magick::Image.read(file).first
end
def do_make_progress_bar (title, total)
ProgressBar.new(title, total)
end
def process_image(image, params)
# read image
image = read_image(image)
# normalize image
image = image.normalize
# strip off 10% border
image = image.excerpt((image.columns * 10/100).to_i, (image.rows * 10/100).to_i,
(image.columns * 90/100).to_i, (image.rows * 90/100).to_i)
# optionally quantize image to 32 colours
image = image.quantize(32, Magick::RGBColorspace) if params[:quantize]
# optionally blur image
image = image.gaussian_blur(0.0, params[:blur_radius]) if params[:blur]
# change geometry of image
image = image.change_geometry(params[:geometry]) { |cols, rows, img| img.resize!(cols, rows)}
image
end
def create_vector_from_image(image)
image.export_pixels(0, 0, image.columns, image.rows, "RGB")
end
def create_cube_from_vector(vector)
# create cube 8x8x8 for each channel
cube = Hash.new
cube[:r] = Hash.new 0
cube[:g] = Hash.new 0
cube[:b] = Hash.new 0
array_mod(vector, 3, 0).each { |r| cube[:r][which_bin(r)] += 1 }
array_mod(vector, 3, 1).each { |g| cube[:g][which_bin(g)] += 1 }
array_mod(vector, 3, 2).each { |b| cube[:b][which_bin(b)] += 1 }
cube
end
def which_bin(channel)
case channel
when 0..8191 then 1
when 8192..16383 then 2
when 16384..24575 then 3
when 24576..32767 then 4
when 32768..40959 then 5
when 40960..49151 then 6
when 49152..57343 then 7
when 57344..65535 then 8
end
end
def array_mod(arr, mod, offset = 0)
arr.shift(offset)
out_arr = []
arr.each_with_index do |val, idx|
out_arr << val if idx % mod == 0
end
out_arr
end
def calculate_euclidian_distance(source_cube, target_cube)
sum = Hash.new 0
1.upto(8) do |r|
1.upto(8) do |g|
1.upto(8) do |b|
sum[:r] += (target_cube[:r][r] - source_cube[:r][r])**2
sum[:g] += (target_cube[:g][g] - source_cube[:g][g])**2
sum[:b] += (target_cube[:b][b] - source_cube[:b][b])**2
end
end
end
Math.sqrt(sum[:r] + sum[:g] + sum[:b])
end
def euclidian_plain_language(distance)
case distance
when 0 then "identical"
when 1..50 then "similar"
when 51..150 then "possibly similar"
else "different"
end
end
def write_results(source, results)
open("results.html", 'w') do |f|
f.puts "Source