Sha256: 561dbb316f211a3f8d1944d99b3afc8ce398012e502da56395f34b1e953a33a7

Contents?: true

Size: 1.32 KB

Versions: 5

Compression:

Stored size: 1.32 KB

Contents

#!/usr/bin/env ruby

require_relative '../lib/MESH'
require 'net/http'
require 'nokogiri'
require 'uri'

mesh_tree = MESH::Tree.new

def sluggify(name)
  CGI.escape(name.downcase.gsub(/\s/, '_').capitalize)
end

count = 0
mesh_tree.each do |h|
  count += 1

  candidates = {}
  images = {}
  h.entries.each do |e|

    slug = sluggify(e)
    uri = URI.parse("http://en.wikipedia.org/wiki/#{slug}")
    response = Net::HTTP.get_response(uri)
    if response.code == '200'
      doc = Nokogiri::HTML(response.body)
      # heading = doc.css('#firstHeading > span')
      canonical = doc.xpath('/html/head/link[@rel="canonical"]/@href')
      candidates[canonical.text] ||= 0
      candidates[canonical.text] += 1
      img_node = doc.xpath('(//table[@class="infobox"]//img)[1]/@src')
      images[canonical.text] ||= img_node.text unless img_node.nil?
    end

    sleep 0.1
  end

  best_candidates = candidates.reduce({}) { |h, (k, v)| (h[v] ||= []) << k; h }.max
  if best_candidates && !best_candidates.empty?
    score, candidates = best_candidates
    puts "UI = #{h.unique_id}"
    puts "MH = #{h.original_heading}"
    puts "ENTRIES = #{h.entries.join(' -- ')}"
    candidates.each do |c|
      puts "WK = #{score}; #{c}"
      puts "WI = #{images[c]}" unless images[c].nil? || images[c].empty?
    end
    puts ''
    STDOUT.flush
  end


end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
mesh-medical-subject-headings-3.0.0 bin/match_wikipedia
mesh-medical-subject-headings-2.3.0 bin/match_wikipedia
mesh-medical-subject-headings-2.2.1 bin/match_wikipedia
mesh-medical-subject-headings-2.2.0 bin/match_wikipedia
mesh-medical-subject-headings-2.1.0 bin/match_wikipedia