lib/MESH/tree.rb in mesh-medical-subject-headings-2.2.0 vs lib/MESH/tree.rb in mesh-medical-subject-headings-2.2.1
- old
+ new
@@ -1,10 +1,9 @@
module MESH
class Tree
- @@descriptor_classes = [:make_array_start_at_1, :topical_descriptor, :publication_type, :check_tag, :geographic_descriptor]
@@default_locale = :en_us
def initialize
@headings = []
@@ -16,86 +15,43 @@
filename = File.expand_path('../../../data/mesh_data_2014/d2014.bin.gz', __FILE__)
gzipped_file = File.open(filename)
file = Zlib::GzipReader.new(gzipped_file)
- current_heading = MESH::Heading.new(self)
- current_heading.default_locale = @@default_locale
+ lines = []
file.each_line do |line|
-
case
-
when line.match(/^\*NEWRECORD$/)
- unless current_heading.unique_id.nil?
- current_heading.entries.sort!
- @headings << current_heading
- @by_unique_id[current_heading.unique_id] = current_heading
- @by_original_heading[current_heading.original_heading] = current_heading
- current_heading.tree_numbers.each do |tree_number|
- raise if @by_tree_number[tree_number]
- @by_tree_number[tree_number] = current_heading
- end
- match_headings = current_heading.entries.map { |e| entry_match_key(e) }.uniq
- match_headings.each do |entry|
- raise "#{@by_entry[entry]} vs #{current_heading} on #{entry}\n\n#{@by_entry[entry].entries}\n\n#{current_heading.entries}" if @by_entry[entry]
- @by_entry[entry] = current_heading
- end
+ unless lines.empty?
+ mh = MESH::Heading.new(self, @@default_locale, lines)
+ add_heading_to_hashes(mh)
+ lines = [line]
end
- current_heading = MESH::Heading.new(self)
- current_heading.default_locale = @@default_locale
-
- when matches = line.match(/^UI = (.*)/)
- current_heading.unique_id = matches[1]
-
- when matches = line.match(/^MN = (.*)/)
- current_heading.tree_numbers << matches[1]
- current_heading.roots << matches[1][0] unless current_heading.roots.include?(matches[1][0])
-
- when matches = line.match(/^MS = (.*)/)
- current_heading.set_summary(matches[1])
-
- when matches = line.match(/^DC = (.*)/)
- current_heading.descriptor_class = @@descriptor_classes[matches[1].to_i]
-
- when matches = line.match(/^ST = (.*)/)
- current_heading.semantic_types << MESH::SemanticTypes[matches[1]]
-
- when matches = line.match(/^MH = (.*)/)
- mh = matches[1]
- current_heading.set_original_heading(mh)
- current_heading.entries << mh unless current_heading.entries.include? mh
- librarian_parts = mh.match(/(.*), (.*)/)
- nln = librarian_parts.nil? ? mh : "#{librarian_parts[2]} #{librarian_parts[1]}"
- current_heading.set_natural_language_name(nln)
-
- # when matches = line.match(/^(?:PRINT )?ENTRY = ([^|]+)/)
- # entry = matches[1].chomp
- # current_heading.entries << entry unless current_heading.entries.include? entry
- #
- when matches = line.match(/^(?:PRINT )?ENTRY = (.*)/)
- entry = matches[1]
- term = entry.match(/([^|]+)/)
- current_heading.entries << term[1] unless current_heading.entries.include? term[1]
- current_heading.structured_entries << MESH::Entry.new(current_heading, entry)
-
+ else
+ lines << line
end
-
end
- @by_unique_id.each do |id, heading|
- heading.tree_numbers.each do |tree_number|
- #D03.438.221.173
- parts = tree_number.split('.')
- if parts.size > 1
- parts.pop
- parent_tree_number = parts.join '.'
- parent = @by_tree_number[parent_tree_number]
- heading.parents << parent unless parent.nil? || heading.parents.include?(parent)
- parent.children << heading unless parent.nil? || parent.children.include?(heading)
- end
- end
+ @headings.each do |heading|
+ heading.connect_to_parents
+ heading.connect_to_forward_references
end
+ end
+
+ def add_heading_to_hashes(mh)
+ @headings << mh
+ @by_unique_id[mh.unique_id] = mh
+ @by_original_heading[mh.original_heading] = mh
+ mh.tree_numbers.each do |tree_number|
+ raise if @by_tree_number[tree_number]
+ @by_tree_number[tree_number] = mh
+ end
+ match_headings = mh.entries.map { |e| entry_match_key(e) }.uniq
+ match_headings.each do |entry|
+ raise if @by_entry[entry]
+ @by_entry[entry] = mh
+ end
end
def entry_match_key(e)
e.strip.upcase
end
\ No newline at end of file