#!/usr/bin/env ruby require 'MESH' class Numeric def duration secs, millisecs = self.divmod 1 # secs = self.to_int mins = secs / 60 hours = mins / 60 days = hours / 24 if days > 0 "#{days} days and #{hours % 24} hours" elsif hours > 0 "#{hours} hours and #{mins % 60} minutes" elsif mins > 0 "#{mins} minutes and #{secs % 60} seconds" elsif secs >= 0 "#{(millisecs + secs).round(3)} seconds" end end end def time_this(name, &block) # print "#{name}" # STDOUT.flush start = Time.now.to_f result = yield finish = Time.now.to_f # puts "\t#{(finish - start).duration}" finish - start end mesh_tree = MESH::Tree.new mesh_tree.load_translation(:en_gb) mesh_tree.load_wikipedia json_str = File.new('./example.json').read extracted = JSON.parse(json_str) timings = Hash.new { |h, k| h[k] = [] } (0..10).each do |i| time = time_this('Matching in title') { mesh_tree.match_in_text(extracted['title']) } timings[:title_headings] << time time = time_this('Matching in description') { mesh_tree.match_in_text(extracted['description']) } timings[:description_headings] << time time = time_this('Matching in content') { mesh_tree.match_in_text(extracted['content']) } timings[:content_headings] << time end # (0..10).each do |i| # time = time_this('Matching in title') { mesh_tree.match_in_text_2(extracted['title']) } # timings[:title_entries_2] << time # time = time_this('Matching in description') { mesh_tree.match_in_text_2(extracted['description']) } # timings[:description_entries_2] << time # time = time_this('Matching in content') { mesh_tree.match_in_text_2(extracted['content']) } # timings[:content_entries_2] << time # end timings.each do |k, v| t = v.map { |e| e.round(3) }.join("\t") puts "#{k}\t#{t}" avg = v.inject { |sum, el| sum + el }.to_f / v.size puts "#{k}\t#{avg.round(3)}" end # classifier = MESH::Classifier.new() # classification = time_this('Classifying from matches') { classifier.classify([ # {weight: 10.0, matches: title_headings}, # {weight: 5.0, matches: description_headings}, # {weight: 1.0, matches: content_headings} # ]) }