lib/namor/namor.rb in namor-0.3.0 vs lib/namor/namor.rb in namor-0.3.1

- old
+ new

@@ -17,48 +17,72 @@ # * squeeze whitespace & trim spaces from ends def scrub(name, opts = {}) suppression_list = @config[:suppress] || [] suppression_re = Regexp.new('\b?' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '\b?') - name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip + name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip end + def fullscrub(name) + final_cleaning(scrub(name)) + end + + def demaiden(lastname) + return [nil,nil] unless lastname && !lastname.empty? + if lastname =~ /\-/ + [lastname.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')] + else + [lastname.gsub(/ /, ''), lastname.split(/ /).last] + end + end + + def final_cleaning(name) + if name && !name.empty? + name.gsub(/\-/, '') + else + nil + end + end + def extract(name, opts = {}) return [] if name.nil? detitled_name = scrub(name, opts) if detitled_name =~ /,/ # "last, first[ middle]" lastname, firstname = detitled_name.split(/\s*,\s*/) - lastname.gsub!(/ /, '') + lastname, de_maidened_last = demaiden(lastname) middlename = nil if firstname && firstname =~ / / pieces = firstname.split(/ +/) firstname = pieces.shift middlename = pieces.join if pieces.any? end else - # "first [middle ]last" + # "first [middle-initial ]last" or "first everything-else-is-the-lastname" pieces = detitled_name.split(' ') firstname = pieces.shift - middlename = nil if pieces.count > 1 && pieces.first.length == 1 # assume this is a middle initial middlename = pieces.shift + else + middlename = nil end - lastname = pieces.join + lastname, de_maidened_last = demaiden(pieces.join(' ')) end - firstname = nil if firstname.empty? - middlename = nil if middlename && middlename.empty? - lastname = nil if lastname.empty? + firstname = final_cleaning(firstname) + middlename = final_cleaning(middlename) + lastname = final_cleaning(lastname) + de_maidened_last = final_cleaning(de_maidened_last) fm = [firstname, middlename].compact.join(' ') fullname = [lastname, fm].compact.join(',') + nee_fullname = [de_maidened_last, fm].compact.join(',') - [firstname, middlename, lastname, fullname] + [firstname, middlename, lastname, fullname, nee_fullname] end def extract_with_cluster(name, opts = {}) ary = extract(name, opts) return [] if ary.empty?