lib/namor/namor.rb in namor-0.5.4 vs lib/namor/namor.rb in namor-0.6.0

- old
+ new

@@ -18,10 +18,15 @@ # '\b' + s.upcase.chomp('.') + '\b' # end # Regexp.new(bits.join('|')) end + def suppress(name, supplist) + @re_cache[supplist] ||= suppression_re(supplist) + name && name.upcase.gsub(@re_cache[supplist], '') + end + # clean up a single name component # * output all converted to uppercase # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes) # * remove any words that are in the user-provided suppression list # * remove words from list of common suffixes (Jr, Sr etc) @@ -29,11 +34,11 @@ # * remove punctuation # * squeeze whitespace & trim spaces from ends def scrub(name, opts = {}) @re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress]) - name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip + name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\)]*\)/, '').gsub(/\[[^\]]*\]/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip end def fullscrub(name, opts = {}) final_cleaning(scrub(name, opts)) end @@ -42,12 +47,13 @@ def scrub_and_squash(name, opts = {}) s = scrub(name, opts) s && s.gsub(/[- ]/, '') end - def demaiden(lastname) + def demaiden(lastname, opts = {}) return [nil,nil] unless lastname && !lastname.empty? + lastname = suppress(lastname, opts[:suppress]) if opts[:suppress] if lastname =~ /\-/ [lastname.upcase.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')] else [lastname.upcase.gsub(/ /, ''), lastname.split(/ /).last] end @@ -112,20 +118,20 @@ return [] if ary.empty? ary << ary[3].gsub(/\W/, '_') ary << ary[4].gsub(/\W/, '_') end - def extract_from_pieces(hash) + def extract_from_pieces(hash, opts = {}) assemble( - scrub(hash[:first]), - scrub(hash[:middle]), - scrub_and_squash(hash[:last]), - scrub_and_squash((s = demaiden(hash[:last])) && s.last) + scrub(hash[:first], opts), + scrub(hash[:middle], opts), + scrub_and_squash(hash[:last], opts), + scrub_and_squash((s = demaiden(hash[:last], opts)) && s.last, opts) ) end - def extract_from_pieces_with_cluster(hash) - ary = extract_from_pieces(hash) + def extract_from_pieces_with_cluster(hash, opts = {}) + ary = extract_from_pieces(hash, opts) ary << ary[3].gsub(/\W/, '_') ary << ary[4].gsub(/\W/, '_') end def components(*args)