lib/MESH/tree.rb in mesh-medical-subject-headings-2.0.6 vs lib/MESH/tree.rb in mesh-medical-subject-headings-2.1.0

- old
+ new

@@ -22,11 +22,11 @@ current_heading.default_locale = @@default_locale file.each_line do |line| case - when matches = line.match(/^\*NEWRECORD$/) + when line.match(/^\*NEWRECORD$/) unless current_heading.unique_id.nil? current_heading.entries.sort! @headings << current_heading @by_unique_id[current_heading.unique_id] = current_heading @by_original_heading[current_heading.original_heading] = current_heading @@ -54,10 +54,13 @@ current_heading.set_summary(matches[1]) when matches = line.match(/^DC = (.*)/) current_heading.descriptor_class = @@descriptor_classes[matches[1].to_i] + when matches = line.match(/^ST = (.*)/) + current_heading.semantic_types << MESH::SemanticTypes[matches[1]] + when matches = line.match(/^MH = (.*)/) mh = matches[1] current_heading.set_original_heading(mh) current_heading.entries << mh unless current_heading.entries.include? mh librarian_parts = mh.match(/(.*), (.*)/) @@ -105,11 +108,11 @@ unique_id = nil file.each_line do |line| case - when matches = line.match(/^\*NEWRECORD$/) + when line.match(/^\*NEWRECORD$/) unless unique_id.nil? entries.sort! entries.uniq! if heading = find(unique_id) heading.set_original_heading(original_heading, locale) unless original_heading.nil? @@ -144,9 +147,48 @@ end end @locales << locale end + + def load_wikipedia + return if @wikipedia_loaded + filename = File.expand_path("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__) + gzipped_file = File.open(filename) + file = Zlib::GzipReader.new(gzipped_file) + + unique_id = nil + wikipedia_links = [] + file.each_line do |line| + + case + + when line.match(/^\*NEWRECORD$/) + unless unique_id.nil? + if heading = find(unique_id) + wikipedia_links.each do |wl| + wl[:score] = (wl[:score].to_f / heading.entries.length.to_f).round(2) + end + heading.wikipedia_links = wikipedia_links + end + + wikipedia_links = [] + unique_id = nil + end + + when matches = line.match(/^UI = (.*)/) + unique_id = matches[1] + + when matches = line.match(/^WK = (.*)/) + hash = JSON.parse(matches[1], symbolize_names: true) + wikipedia_links << hash + + end + + end + @wikipedia_loaded = true + end + def linkify_summaries &block @headings.each do |h| h.linkify_summary &block end \ No newline at end of file