module ICU class Name attr_reader :first, :last # Construct from one or two strings or any objects that have a to_s method. def initialize(name1='', name2='') @name1 = name1.to_s @name2 = name2.to_s canonicalize end # Return a complete name, first name first, no comma. def name name = '' name << @first name << ' ' if @first.length > 0 && @last.length > 0 name << @last name end # Return a reversed complete name, first name last after a comma. def rname name = '' name << @last name << ', ' if @first.length > 0 && @last.length > 0 name << @first name end # Convert object to a string. def to_s rname end # Match another name to this object, returning true or false. def match(name1='', name2='') other = Name.new(name1, name2) match_first(first, other.first) && match_last(last, other.last) end private # Canonicalise the first and last names. def canonicalize first, last = partition @first = finish_first(first) @last = finish_last(last) end # Split one complete name into first and last parts. def partition if @name2.length == 0 # Only one imput so we must split first and last. parts = @name1.split(/,/) if parts.size > 1 last = clean(parts.shift || '') first = clean(parts.join(' ')) else parts = clean(@name1).split(/ /) last = parts.pop || '' first = parts.join(' ') end else # Two inputs, so we are given first and last. first = clean(@name1) last = clean(@name2) end [first, last] end # Clean up characters in any name. def clean(name) name.gsub!(/`/, "'") name.gsub!(/[^-a-zA-Z.'\s]/, '') name.gsub!(/\./, ' ') name.gsub!(/\s*-\s*/, '-') name.gsub!(/'+/, "'") name.strip.downcase.split(/\s+/).map do |n| n.sub!(/^-+/, '') n.sub!(/-+$/, '') n.split(/-/).map do |p| p.capitalize! end.join('-') end.join(' ') end # Apply final touches to finish canonicalising a first name. def finish_first(names) names.gsub(/([A-Z])\b/, '\1.') end # Apply final touches to finish canonicalising a last name. def finish_last(names) names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase} names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase} names.gsub!(/\bMac([a-z])/) do |m| letter = $1 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase) end names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 } names end # Match a complete first name. def match_first(first1, first2) # Is this one a walk in the park? return true if first1 == first2 # No easy ride. Begin by splitting into individual first names. first1 = split_first(first1) first2 = split_first(first2) # Get the long list and the short list. long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1] # The short one must be a "subset" of the long one. # An extra condition must also be satisfied. extra = false (0..long.size-1).each do |i| lword = long.shift score = match_first_name(lword, short.first) if score >= 0 short.shift extra = true if i == 0 || score == 0 end break if short.empty? || long.empty? end # There's a match if the following is true. short.empty? && extra end # Match a complete last name. def match_last(last1, last2) return true if last1 == last2 [last1, last2].each do |last| last.downcase! # MacDonaugh and Macdonaugh last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly end last1 == last2 end # Split a complete first name for matching. def split_first(first) first.tr!('-', ' ') # J. K. and J.-K. first = first.split(/ /) # split on spaces first = [''] if first.size == 0 # in case input was empty string first end # Match individual first names or initials. # -1 = no match # 0 = full match # 1 = match involving 1 initial # 2 = match involving 2 initials def match_first_name(first1, first2) initials = 0 initials+= 1 if first1.match(/^[A-Z]\.?$/) initials+= 1 if first2.match(/^[A-Z]\.?$/) return initials if first1 == first2 return 0 if initials == 0 && match_nick_name(first1, first2) return -1 unless initials > 0 return initials if first1[0] == first2[0] -1 end # Match two first names that might be equivalent nicknames. def match_nick_name(nick1, nick2) compile_nick_names unless @@nc code1 = @@nc[nick1] return false unless code1 code1 == @@nc[nick2] end # Compile the nick names code hash when matching nick names is first attempted. def compile_nick_names @@nc = Hash.new code = 1 @@nl.each do |nicks| nicks.each do |n| throw "duplicate name #{n}" if @@nc[n] @@nc[n] = code end code+= 1 end end # A array of data for matching nicknames and also a few common misspellings. @@nc = nil @@nl = <