lib/MESH/translator.rb in mesh-medical-subject-headings-1.2.2 vs lib/MESH/translator.rb in mesh-medical-subject-headings-1.3.0

- old
+ new

@@ -1,20 +1,37 @@ module MESH class Translator + attr_accessor :dictionary + def translate(input) + return nil if input.nil? input = input.clone - @enus_to_engb.each do |match, replacement| - start_middle_and_end(input, match.downcase, replacement.downcase) - start_middle_and_end(input, match.capitalize, replacement.capitalize) - start_middle_and_end(input, match.upcase, replacement.upcase) - end + @downcased.each { |match, replacement| input.gsub!(match, replacement) } + @capitalized.each { |match, replacement| input.gsub!(match, replacement) } + @upcased.each { |match, replacement| input.gsub!(match, replacement) } input end - def initialize - @enus_to_engb = { + def initialize(dictionary) + @dictionary = dictionary + @downcased = {} + @capitalized = {} + @upcased = {} + dictionary.each do |match,replacement| + @downcased[/(^|\W)#{Regexp.quote(match.downcase)}(\W|$)/] = "\\1#{replacement.downcase}\\2" + @capitalized[/(^|\W)#{Regexp.quote(match.capitalize)}(\W|$)/] = "\\1#{replacement.capitalize}\\2" + @upcased[/(^|\W)#{Regexp.quote(match.upcase)}(\W|$)/] = "\\1#{replacement.upcase}\\2" + end + end + + def self.engb_to_enus + @@engb_to_enus ||= @@enus_to_engb.invert + end + + def self.enus_to_engb + @@enus_to_engb ||= { 'abrigment' => 'abrigement', 'acknowledgment' => 'acknowledgement', 'airplane' => 'aeroplane', 'aluminum' => 'aluminium', 'amortize' => 'amortise', @@ -59,10 +76,11 @@ 'genuflection' => 'genuflexion', 'gonorrhea' => 'gonorrhoea', 'gynecology' => 'gynaecology', 'harbor' => 'harbour', 'hematemesis' => 'haematemesis', + 'hematology' => 'haematology', 'hemoglobin' => 'haemoglobin', 'hemorrhoid' => 'haemorrhoid', 'homeopath' => 'homoeopath', 'honor' => 'honour', 'humor' => 'humour', @@ -108,19 +126,9 @@ 'tumor' => 'tumour', 'urohematoporphyrin' => 'urohaematoporphyrin', 'vapor' => 'vapour', 'vaporize' => 'vaporise' } - - end - - private - - def start_middle_and_end(input, match, replacement) - input.gsub!(/^#{Regexp.quote(match)}$/, replacement) #alone - input.gsub!(/^#{Regexp.quote(match)}(\W+)/) { "#{replacement}#{$1}" } #start - input.gsub!(/(\W+)#{Regexp.quote(match)}(\W+)/) { "#{$1}#{replacement}#{$2}" } #middle - input.gsub!(/(\W+)#{Regexp.quote(match)}$/) { "#{$1}#{replacement}" } #end end end end