Sha256: 561dbb316f211a3f8d1944d99b3afc8ce398012e502da56395f34b1e953a33a7
Contents?: true
Size: 1.32 KB
Versions: 5
Compression:
Stored size: 1.32 KB
Contents
#!/usr/bin/env ruby require_relative '../lib/MESH' require 'net/http' require 'nokogiri' require 'uri' mesh_tree = MESH::Tree.new def sluggify(name) CGI.escape(name.downcase.gsub(/\s/, '_').capitalize) end count = 0 mesh_tree.each do |h| count += 1 candidates = {} images = {} h.entries.each do |e| slug = sluggify(e) uri = URI.parse("http://en.wikipedia.org/wiki/#{slug}") response = Net::HTTP.get_response(uri) if response.code == '200' doc = Nokogiri::HTML(response.body) # heading = doc.css('#firstHeading > span') canonical = doc.xpath('/html/head/link[@rel="canonical"]/@href') candidates[canonical.text] ||= 0 candidates[canonical.text] += 1 img_node = doc.xpath('(//table[@class="infobox"]//img)[1]/@src') images[canonical.text] ||= img_node.text unless img_node.nil? end sleep 0.1 end best_candidates = candidates.reduce({}) { |h, (k, v)| (h[v] ||= []) << k; h }.max if best_candidates && !best_candidates.empty? score, candidates = best_candidates puts "UI = #{h.unique_id}" puts "MH = #{h.original_heading}" puts "ENTRIES = #{h.entries.join(' -- ')}" candidates.each do |c| puts "WK = #{score}; #{c}" puts "WI = #{images[c]}" unless images[c].nil? || images[c].empty? end puts '' STDOUT.flush end end
Version data entries
5 entries across 5 versions & 1 rubygems