lib/icu_name.rb in icu_name-0.0.3 vs lib/icu_name.rb in icu_name-0.0.4

- old
+ new

@@ -1,230 +1,2 @@ -module ICU - class Name - attr_reader :first, :last - - # Construct from one or two strings or any objects that have a to_s method. - def initialize(name1='', name2='') - @name1 = name1.to_s - @name2 = name2.to_s - canonicalize - end - - # Return a complete name, first name first, no comma. - def name - name = '' - name << @first - name << ' ' if @first.length > 0 && @last.length > 0 - name << @last - name - end - - # Return a reversed complete name, first name last after a comma. - def rname - name = '' - name << @last - name << ', ' if @first.length > 0 && @last.length > 0 - name << @first - name - end - - # Convert object to a string. - def to_s - rname - end - - # Match another name to this object, returning true or false. - def match(name1='', name2='') - other = Name.new(name1, name2) - match_first(first, other.first) && match_last(last, other.last) - end - - private - - # Canonicalise the first and last names. - def canonicalize - first, last = partition - @first = finish_first(first) - @last = finish_last(last) - end - - # Split one complete name into first and last parts. - def partition - if @name2.length == 0 - # Only one imput so we must split first and last. - parts = @name1.split(/,/) - if parts.size > 1 - last = clean(parts.shift || '') - first = clean(parts.join(' ')) - else - parts = clean(@name1).split(/ /) - last = parts.pop || '' - first = parts.join(' ') - end - else - # Two inputs, so we are given first and last. - first = clean(@name1) - last = clean(@name2) - end - [first, last] - end - - # Clean up characters in any name. - def clean(name) - name.gsub!(/`/, "'") - name.gsub!(/[^-a-zA-Z.'\s]/, '') - name.gsub!(/\./, ' ') - name.gsub!(/\s*-\s*/, '-') - name.gsub!(/'+/, "'") - name.strip.downcase.split(/\s+/).map do |n| - n.sub!(/^-+/, '') - n.sub!(/-+$/, '') - n.split(/-/).map do |p| - p.capitalize! - end.join('-') - end.join(' ') - end - - # Apply final touches to finish canonicalising a first name. - def finish_first(names) - names.gsub(/([A-Z])\b/, '\1.') - end - - # Apply final touches to finish canonicalising a last name. - def finish_last(names) - names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase} - names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase} - names.gsub!(/\bMac([a-z])/) do |m| - letter = $1 - 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase) - end - names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 } - names - end - - # Match a complete first name. - def match_first(first1, first2) - # Is this one a walk in the park? - return true if first1 == first2 - - # No easy ride. Begin by splitting into individual first names. - first1 = split_first(first1) - first2 = split_first(first2) - - # Get the long list and the short list. - long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1] - - # The short one must be a "subset" of the long one. - # An extra condition must also be satisfied. - extra = false - (0..long.size-1).each do |i| - lword = long.shift - score = match_first_name(lword, short.first) - if score >= 0 - short.shift - extra = true if i == 0 || score == 0 - end - break if short.empty? || long.empty? - end - - # There's a match if the following is true. - short.empty? && extra - end - - # Match a complete last name. - def match_last(last1, last2) - return true if last1 == last2 - [last1, last2].each do |last| - last.downcase! # MacDonaugh and Macdonaugh - last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh - last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly - end - last1 == last2 - end - - # Split a complete first name for matching. - def split_first(first) - first.tr!('-', ' ') # J. K. and J.-K. - first = first.split(/ /) # split on spaces - first = [''] if first.size == 0 # in case input was empty string - first - end - - # Match individual first names or initials. - # -1 = no match - # 0 = full match - # 1 = match involving 1 initial - # 2 = match involving 2 initials - def match_first_name(first1, first2) - initials = 0 - initials+= 1 if first1.match(/^[A-Z]\.?$/) - initials+= 1 if first2.match(/^[A-Z]\.?$/) - return initials if first1 == first2 - return 0 if initials == 0 && match_nick_name(first1, first2) - return -1 unless initials > 0 - return initials if first1[0] == first2[0] - -1 - end - - # Match two first names that might be equivalent nicknames. - def match_nick_name(nick1, nick2) - compile_nick_names unless @@nc - code1 = @@nc[nick1] - return false unless code1 - code1 == @@nc[nick2] - end - - # Compile the nick names code hash when matching nick names is first attempted. - def compile_nick_names - @@nc = Hash.new - code = 1 - @@nl.each do |nicks| - nicks.each do |n| - throw "duplicate name #{n}" if @@nc[n] - @@nc[n] = code - end - code+= 1 - end - end - - # A array of data for matching nicknames and also a few common misspellings. - @@nc = nil - @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')} -Abdul Abul -Alexander Alex -Anandagopal Ananda -Anne Ann -Anthony Tony -Benjamin Ben -Catherine Cathy Cath -Daniel Danial Danny Dan -David Dave -Deborah Debbie -Des Desmond -Eamonn Eamon -Edward Eddie Ed -Eric Erick Erik -Frederick Frederic Fred -Gerald Gerry -Gerhard Gerard Ger -James Jim -Joanna Joan Joanne -John Johnny -Jonathan Jon -Kenneth Ken Kenny -Michael Mike Mick Micky -Nicholas Nick Nicolas -Nicola Nickie Nicky -Patrick Pat Paddy -Peter Pete -Philippe Philip Phillippe Phillip -Rick Ricky -Robert Bob Bobby -Samual Sam Samuel -Stefanie Stef -Stephen Steven Steve -Terence Terry -Thomas Tom Tommy -William Will Willy Willie Bill -EOF - end -end +require 'icu_name/name.rb' +require 'icu_name/util.rb' \ No newline at end of file